From d57c367eec0d8edee70d40f2687e91cfe9b2e24e Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 11:31:55 +0100 Subject: [PATCH 001/120] enh(preprocessing): Add split_markdown_by_headings. --- docs/step-by-step-guide.md | 4 +- pyproject.toml | 1 - src/structured_qa/preprocessing.py | 74 ++++++++++++++++++++++++------ tests/unit/test_preprocessing.py | 46 ++++++++++++++++++- 4 files changed, 107 insertions(+), 18 deletions(-) diff --git a/docs/step-by-step-guide.md b/docs/step-by-step-guide.md index 27f2c82..6f7d06f 100644 --- a/docs/step-by-step-guide.md +++ b/docs/step-by-step-guide.md @@ -34,9 +34,7 @@ The document is first converted to markdown and then split into sections based o **Section Splitting** - - Uses [langchain-text-splitters](https://pypi.org/project/langchain-text-splitters/) - - - Splits on `("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")` + - Uses [split_markdown_by_headings](api.md/#structured_qa.preprocessing.split_markdown_by_headings) - Each section is saved to a separate file. diff --git a/pyproject.toml b/pyproject.toml index d5f5bb9..75d203c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ dependencies = [ "huggingface-hub", "llama-cpp-python", "loguru", - "langchain-text-splitters", "pydantic", "pymupdf4llm", "pyyaml", diff --git a/src/structured_qa/preprocessing.py b/src/structured_qa/preprocessing.py index c673f25..f85d701 100644 --- a/src/structured_qa/preprocessing.py +++ b/src/structured_qa/preprocessing.py @@ -1,18 +1,70 @@ +import re +from collections import defaultdict from pathlib import Path import pymupdf4llm -from langchain_text_splitters import MarkdownHeaderTextSplitter from loguru import logger +def split_markdown_by_headings( + markdown_text, heading_patterns: list[str] | None = None +) -> dict[str, str]: + """Splits a markdown document into sections based on specified heading patterns. + + Args: + markdown_text (str): The markdown document as a single string. + heading_patterns (str, optional): A list of regex patterns representing heading markers + in the markdown document. + Defaults to None. + If None, the default patterns are used: + + ```python + [ + r"^#\s+(.+)$", + r"^##\s+(.+)$", + r"^###\s+(.+)$", + r"^\*\*[\d\.]+\.\*\*\s*\*\*(.+)\*\*$", + r"^\*\*[\d\.]+\.\*\*\s+(.+)$" + ] + ``` + + Returns: + dict[str, str]: A dictionary where the keys are the section names and the values are the section contents. + """ + if heading_patterns is None: + heading_patterns = [ + r"^#\s+(.+)$", + r"^##\s+(.+)$", + r"^###\s+(.+)$", + r"^####\s+(.+)$", + r"^\*\*[\d\.]+\.\*\*\s*\*\*(.+)\*\*$", + ] + + sections = defaultdict(str) + + heading_text = "INTRO" + for line in markdown_text.splitlines(): + line = line.strip() + if not line: + continue + for pattern in heading_patterns: + match = re.match(pattern, line) + if match: + heading_text = match.group(1)[:100] + break + sections[heading_text] += f"{line}\n" + + return sections + + @logger.catch(reraise=True) def document_to_sections_dir(input_file: str, output_dir: str) -> list[str]: """ Convert a document to a directory of sections. Uses [pymupdf4llm](https://pypi.org/project/pymupdf4llm/) to convert input_file to markdown. - Then uses [langchain_text_splitters](https://pypi.org/project/langchain-text-splitters/) to split the markdown into sections based on the headers. + Then uses [`split_markdown_by_headings`][structured_qa.preprocessing.split_markdown_by_headings] to split the markdown into sections based on the headers. Args: input_file: Path to the input document. @@ -32,27 +84,23 @@ def document_to_sections_dir(input_file: str, output_dir: str) -> list[str]: logger.info(f"Converting {input_file}") md_text = pymupdf4llm.to_markdown(input_file) + Path("debug.md").write_text(md_text) logger.success("Converted") logger.info("Extracting sections") - splitter = MarkdownHeaderTextSplitter( - headers_to_split_on=[("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")] + sections = split_markdown_by_headings( + md_text, ) - sections = splitter.split_text(md_text) logger.success(f"Found {len(sections)} sections") logger.info(f"Writing sections to {output_dir}") output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) - section_names = [] - for section in sections: - if not section.metadata: - continue - section_name = list(section.metadata.values())[-1].lower() - section_names.append(section_name) + + for section_name, section_content in sections.items(): (output_dir / f"{section_name.replace('/', '_')}.txt").write_text( - section.page_content + section_content ) logger.success("Done") - return section_names + return sections.keys() diff --git a/tests/unit/test_preprocessing.py b/tests/unit/test_preprocessing.py index 627bdd1..8b3fd05 100644 --- a/tests/unit/test_preprocessing.py +++ b/tests/unit/test_preprocessing.py @@ -1,3 +1,6 @@ +import pytest + +from structured_qa.preprocessing import split_markdown_by_headings from structured_qa.preprocessing import document_to_sections_dir @@ -6,4 +9,45 @@ def test_document_to_sections_dir(tmp_path, example_data): document_to_sections_dir(example_data / "1706.03762v7.pdf", output_dir) sections = list(output_dir.iterdir()) assert all(section.is_file() and section.suffix == ".txt" for section in sections) - assert len(sections) == 10 + assert len(sections) == 12 + + +DEFAULT_HEADINGS = """ +# Introduction + +This is the introduction. + +## Related Work + +This is the related work. + +### Method + +This is the method. +""" + +NUMERIC_HEADINGS = """ +**1.** **Introduction** + +This is the introduction. + +**2.** **Related Work** + +This is the related work. + +**2.1** **Method** + +This is the method. +""" + + +@pytest.mark.parametrize( + ("markdown_text", "n_sections"), + ( + (DEFAULT_HEADINGS, 3), + (NUMERIC_HEADINGS, 2), + ), +) +def test_split_markdown_by_headings(markdown_text, n_sections): + sections = split_markdown_by_headings(markdown_text) + assert len(sections) == n_sections From fe93f7426da8c9f7c21c59b4a93f4e367108b6b5 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 13:41:54 +0100 Subject: [PATCH 002/120] Add benchmark --- benchmark/gemini.py | 80 +++++++++++++++++++++++++++++++++++++ benchmark/run_benchmark.py | 43 ++++++++++++++++++++ benchmark/structured_qa.csv | 61 ++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+) create mode 100644 benchmark/gemini.py create mode 100644 benchmark/run_benchmark.py create mode 100644 benchmark/structured_qa.csv diff --git a/benchmark/gemini.py b/benchmark/gemini.py new file mode 100644 index 0000000..d742fea --- /dev/null +++ b/benchmark/gemini.py @@ -0,0 +1,80 @@ +import datetime +import json +import os +import time + +import google.generativeai as genai +from loguru import logger + +SYSTEM_PROMPT = """ +You are given an input document and a question. +You can only answer the question based on the information in the document. +You will return a JSON name with two keys: "section" and "answer". +In `"section"`, you will return the name of the section where you found the answer. +In `"answer"`, you will return the answer either as Yes/No (for boolean questions) or as a single number (for numeric questions). +Example response: +{ + "section": "1. Introduction", + "answer": "No" +} +""" + + +def gemini_process_document(document_file, document_data): + genai.configure(api_key=os.environ["GEMINI_API_KEY"]) + + logger.info("Uploading file") + file = genai.upload_file(document_file, mime_type="application/pdf") + while file.state.name == 'PROCESSING': + logger.debug('Waiting for file to be processed.') + time.sleep(2) + file = genai.get_file(file.name) + + logger.info("Creating cache") + cache =genai.caching.CachedContent.create( + model="models/gemini-1.5-flash-8b-latest", + display_name='cached file', # used to identify the cache + system_instruction=SYSTEM_PROMPT, + contents=[file], + ttl=datetime.timedelta(minutes=15), + ) + + logger.info("Creating model") + model = genai.GenerativeModel.from_cached_content( + cached_content=cache, + generation_config={ + "temperature": 1, + "top_p": 0.95, + "top_k": 40, + "max_output_tokens": 8192, + "response_mime_type": "application/json", + } + ) + + logger.info("Predicting") + n = 0 + answers = {} + sections = {} + for index, row in document_data.iterrows(): + if n > 0 and n % 13 == 0: + logger.info("Waiting for 60 seconds") + time.sleep(60) + question = row["question"] + logger.debug(f"Question: {question}") + chat_session = model.start_chat( + history=[ + { + "role": "user", + "parts": [ + question, + ], + } + ] + ) + + response = chat_session.send_message("INSERT_INPUT_HERE") + logger.debug(response.text) + response_json = json.loads(response.text) + answers[index] = response_json["answer"] + sections[index] = response_json["section"] + n += 1 diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py new file mode 100644 index 0000000..40e32d8 --- /dev/null +++ b/benchmark/run_benchmark.py @@ -0,0 +1,43 @@ +from pathlib import Path +from urllib.request import urlretrieve + +import pandas as pd +from fire import Fire +from loguru import logger + + +from gemini import gemini_process_document + + +def download_document(url, output_file): + if not Path(output_file).exists(): + urlretrieve(url, output_file) + logger.debug(f"Downloaded {url} to {output_file}") + else: + logger.debug(f"File {output_file} already exists") + + +@logger.catch(reraise=True) +def run_benchmark(input_data: str, output_file: str, model: str): + logger.info("Loading input data") + data = pd.read_csv(input_data) + data["pred_answer"] = [None] * len(data) + data["pred_section"] = [None] * len(data) + + + for document_link, document_data in data.groupby("document"): + logger.info(f"Downloading document {document_link}") + downloaded_document = Path(f"example_data/{Path(document_link).name}.pdf") + download_document(document_link, downloaded_document) + + if model == "gemini": + answers, sections = gemini_process_document(downloaded_document, document_data) + + for index in document_data.index: + data.loc[index, "pred_answer"] = answers[index] + data.loc[index, "pred_section"] = sections[index] + + data.to_csv(output_file) + +if __name__ == "__main__": + Fire(run_benchmark) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv new file mode 100644 index 0000000..fa04a2f --- /dev/null +++ b/benchmark/structured_qa.csv @@ -0,0 +1,61 @@ +document,section,question,bool_answer,num_answer,multi_choice_answer +https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use a encoder only architecture,0,, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use a decoder only architecture,0,, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use a encoder-decoder architecture,1,, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the encoder,,6, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the decoder,,6, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many parallel attention heads are used,,8, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use learned embeddings for the input and output tokens,1,, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use learned positional embeddings,0,, +https://arxiv.org/pdf/1706.03762,5 Training,How many GPUs were used for training,,8, +https://arxiv.org/pdf/1706.03762,5 Training,Was the model trained on NVIDIA A100 GPUs,0,, +https://arxiv.org/pdf/1706.03762,5 Training,Was the model trained on NVIDIA P100 GPUs,1,, +https://arxiv.org/pdf/1706.03762,5 Training,Was the SGD optimizer used,0,, +https://arxiv.org/pdf/1706.03762,5 Training,Was the AdamW optimizer used,0,, +https://arxiv.org/pdf/1706.03762,5 Training,Was the Adam optimizer used,1,, +https://arxiv.org/pdf/1706.03762,5 Training,Was a fixed learning rate used,0,, +https://arxiv.org/pdf/1706.03762,5 Training,Was a varied learning rate used,1,, +https://arxiv.org/pdf/1706.03762,5 Training,How many warmup steps were used,,4000, +https://arxiv.org/pdf/1706.03762,5 Training,Was the label dropout regularization used during training,1,, +https://arxiv.org/pdf/1706.03762,5 Training,What was the dropout rate used for the base model,,0.1, +https://arxiv.org/pdf/1706.03762,5 Training,Was the label smoothing regularization used during training,1,, +https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,How many layers are in the toy model (y = x^2)?,,3, +https://arxiv.org/pdf/2210.05190,2.1 Fully Connected Networks,Does the model use Sigmoid activation function?,0,, +https://arxiv.org/pdf/2210.05191,3 Experimental Results,How many parameters are in the y = x^2 toy model tree?,,14, +https://arxiv.org/pdf/2210.05192,2.4 Recurrent Networks,Can recurrent networks also be converted to decision trees?,1,, +https://arxiv.org/pdf/2210.05193,3 Experimental Results,How many layers are in the half-moon neural network?,,3, +https://arxiv.org/pdf/2210.05194,3 Experimental Results,"What is the main computational advantage of decision trees? A: Less storage memory, B: Fewer operations, C: Lower accuracy",,,B +https://arxiv.org/pdf/2106.09685v2.pdf,4 Our Method,Does LoRA work with any neural network containing dense layers?,1,, +https://arxiv.org/pdf/2106.09685v2.pdf,5.5 Scaling Up to GPT-3,"How much memory is saved when training GPT-3 175B with LoRA compared to full fine-tuning? A: 850GB, B: 100GB, C: 5GB",,,A +https://arxiv.org/pdf/2106.09685v2.pdf,Abstract,"By how much can LoRA reduce GPU memory requirements during training? A: 10x, B: 5x, C: 3x",,,C +https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,"In billions, how many trainable parameters does GPT-3 have?",,175, +https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,Does LoRA introduce additional inference latency compared to full fine-tuning?,0,, +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Prohibited AI Practices (Article 5),"Which type of AI systems are banned by the AI Act? (A) High-risk systems, (B) Manipulative systems, (C) Real-time biometric systems in public spaces",,,C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401690,Requirements for High-Risk AI Systems (Article 10),"what is a requirement for datasets used in high-risk AI systems? (A) Exclusively open-source datasets, (B) Datasets ensuring quality and diversity, (C) Datasets not exceeding 1 GB in size",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401691,Classification rules (article 51),"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? +A) 10^15, B) 10^20, C) 10^25",,,C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401692,"TRANSPARENCY OBLIGATIONS FOR PROVIDERS AND DEPLOYERS OF CERTAIN AI SYSTEMS +(Article 50)","What should providers of AI systems that generate synthetic content ensure? +A) That the content is not marked in any way. B) That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. C) That there is no way to detect that the content is synthetic.",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401693,Sharing of information on serious incidents (article 73),How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? A) 3 days B) 7 days C) 14 days,,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401694,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes (Article 60),"What is the maximum duration of testing in real-world conditions? a) 3 months b) 6 months, with a possible extension of an additional 6 months. c) 12 months",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401695,Penalties (Article 99),"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? A) 7,500,000 EUR or 1% of annual turnover, whichever is higher. B) 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher C) 10,000,000 EUR or 5% of annual turnover, whichever is higher",,,A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401696,Code of practice (article 56),By what date should codes of practice be ready? a) 2 May 2025 b) 2 May 2024 c) 2 August 2025,,,A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401697,"Compliant AI systems which present a risk (article 82) +",What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? a) 1 month b) 2 months c) Immediately,,,C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401698,EU declaration of conformity (article 47),"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? a) 5 years b) 10 years c) 15 years",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401699,Establishment and structure of the European Artificial Intelligence Board (article 65),"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? a) 2 years, renewable once b) 3 years, renewable once c) 4 years, renewable once",,,B +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,"According to the guide, what is the typical license used to grant reuse rights with libre open access? a) GNU General Public License b) Creative Commons license c) MIT license",,,B +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? a) Over 10,000 b) Over 20,000 c) Exactly 30,000",,,A +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 2 Benefits of Open Access,what is the term of office for members of the advisory board of the Authors Alliance? a) The source does not specify a term of office for the advisory board. b) 2 years c) 4 years,,,A +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,Does open access eliminate price barriers?,1,, +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chatper 1 What is this guide and who is it for,Are publication fees required for all open access journals?,0,, +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bull and Melinda Gates foundation implement an open access policy?,,2015, +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,0,, +https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,0,, +https://arxiv.org/pdf/2201.11904,3.1 Experimental Setup,How many large language models were evaluated?,,5, +https://arxiv.org/pdf/2201.11905,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,,5, +https://arxiv.org/pdf/2201.11906,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,1,, +https://arxiv.org/pdf/2201.11907,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,,3, +https://arxiv.org/pdf/2201.11908,3.2 Results,How many random samples for examined to understand model errors?,,50, +https://arxiv.org/pdf/2201.11909,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? A: Coin Flip, B: Tower of Hanoi, C: Chess puzzles",,,A From 92c70a77360c27cdf683183ac4373d4bdaad32ea Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 13:43:41 +0100 Subject: [PATCH 003/120] Move to structured_qa. Add entrypoint --- pyproject.toml | 1 + {benchmark => src/structured_qa/benchmark}/gemini.py | 12 ++++++------ .../structured_qa/benchmark}/run_benchmark.py | 10 ++++++---- .../structured_qa/benchmark}/structured_qa.csv | 0 4 files changed, 13 insertions(+), 10 deletions(-) rename {benchmark => src/structured_qa/benchmark}/gemini.py (88%) rename {benchmark => src/structured_qa/benchmark}/run_benchmark.py (89%) rename {benchmark => src/structured_qa/benchmark}/structured_qa.csv (100%) diff --git a/pyproject.toml b/pyproject.toml index 75d203c..cdb2c2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,3 +46,4 @@ namespaces = false [project.scripts] structured-qa = "structured_qa.cli:main" +structured-qa-benchmark = "structured_qa.benchmark.run_benchmark:main" diff --git a/benchmark/gemini.py b/src/structured_qa/benchmark/gemini.py similarity index 88% rename from benchmark/gemini.py rename to src/structured_qa/benchmark/gemini.py index d742fea..3efc199 100644 --- a/benchmark/gemini.py +++ b/src/structured_qa/benchmark/gemini.py @@ -21,19 +21,19 @@ def gemini_process_document(document_file, document_data): - genai.configure(api_key=os.environ["GEMINI_API_KEY"]) + genai.configure(api_key=os.environ["GEMINI_API_KEY"]) logger.info("Uploading file") file = genai.upload_file(document_file, mime_type="application/pdf") - while file.state.name == 'PROCESSING': - logger.debug('Waiting for file to be processed.') + while file.state.name == "PROCESSING": + logger.debug("Waiting for file to be processed.") time.sleep(2) file = genai.get_file(file.name) logger.info("Creating cache") - cache =genai.caching.CachedContent.create( + cache = genai.caching.CachedContent.create( model="models/gemini-1.5-flash-8b-latest", - display_name='cached file', # used to identify the cache + display_name="cached file", # used to identify the cache system_instruction=SYSTEM_PROMPT, contents=[file], ttl=datetime.timedelta(minutes=15), @@ -48,7 +48,7 @@ def gemini_process_document(document_file, document_data): "top_k": 40, "max_output_tokens": 8192, "response_mime_type": "application/json", - } + }, ) logger.info("Predicting") diff --git a/benchmark/run_benchmark.py b/src/structured_qa/benchmark/run_benchmark.py similarity index 89% rename from benchmark/run_benchmark.py rename to src/structured_qa/benchmark/run_benchmark.py index 40e32d8..9496e03 100644 --- a/benchmark/run_benchmark.py +++ b/src/structured_qa/benchmark/run_benchmark.py @@ -24,20 +24,22 @@ def run_benchmark(input_data: str, output_file: str, model: str): data["pred_answer"] = [None] * len(data) data["pred_section"] = [None] * len(data) - for document_link, document_data in data.groupby("document"): logger.info(f"Downloading document {document_link}") downloaded_document = Path(f"example_data/{Path(document_link).name}.pdf") download_document(document_link, downloaded_document) if model == "gemini": - answers, sections = gemini_process_document(downloaded_document, document_data) + answers, sections = gemini_process_document( + downloaded_document, document_data + ) for index in document_data.index: data.loc[index, "pred_answer"] = answers[index] data.loc[index, "pred_section"] = sections[index] - + data.to_csv(output_file) -if __name__ == "__main__": + +def main(): Fire(run_benchmark) diff --git a/benchmark/structured_qa.csv b/src/structured_qa/benchmark/structured_qa.csv similarity index 100% rename from benchmark/structured_qa.csv rename to src/structured_qa/benchmark/structured_qa.csv From 70ef78561f941ebc6a99d3e46133aead34a89246 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 13:52:39 +0100 Subject: [PATCH 004/120] Move back outside --- benchmark/benchmark.ipynb | 0 {src/structured_qa/benchmark => benchmark}/gemini.py | 0 {src/structured_qa/benchmark => benchmark}/run_benchmark.py | 0 {src/structured_qa/benchmark => benchmark}/structured_qa.csv | 0 pyproject.toml | 1 - 5 files changed, 1 deletion(-) create mode 100644 benchmark/benchmark.ipynb rename {src/structured_qa/benchmark => benchmark}/gemini.py (100%) rename {src/structured_qa/benchmark => benchmark}/run_benchmark.py (100%) rename {src/structured_qa/benchmark => benchmark}/structured_qa.csv (100%) diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/src/structured_qa/benchmark/gemini.py b/benchmark/gemini.py similarity index 100% rename from src/structured_qa/benchmark/gemini.py rename to benchmark/gemini.py diff --git a/src/structured_qa/benchmark/run_benchmark.py b/benchmark/run_benchmark.py similarity index 100% rename from src/structured_qa/benchmark/run_benchmark.py rename to benchmark/run_benchmark.py diff --git a/src/structured_qa/benchmark/structured_qa.csv b/benchmark/structured_qa.csv similarity index 100% rename from src/structured_qa/benchmark/structured_qa.csv rename to benchmark/structured_qa.csv diff --git a/pyproject.toml b/pyproject.toml index cdb2c2d..75d203c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,4 +46,3 @@ namespaces = false [project.scripts] structured-qa = "structured_qa.cli:main" -structured-qa-benchmark = "structured_qa.benchmark.run_benchmark:main" From 16ff8bd57e6ca076b8f2966c034da05c3f2608a5 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 13:56:09 +0100 Subject: [PATCH 005/120] Fix main --- benchmark/run_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index 9496e03..87786f7 100644 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -41,5 +41,5 @@ def run_benchmark(input_data: str, output_file: str, model: str): data.to_csv(output_file) -def main(): +if __name__ == "__main__": Fire(run_benchmark) From 539898e82d33cacb50f93919434a4d415dbaa1eb Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:15:58 +0100 Subject: [PATCH 006/120] Update questions --- benchmark/run_benchmark.py | 2 +- benchmark/structured_qa.csv | 15 +++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index 87786f7..2a26e81 100644 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -26,7 +26,7 @@ def run_benchmark(input_data: str, output_file: str, model: str): for document_link, document_data in data.groupby("document"): logger.info(f"Downloading document {document_link}") - downloaded_document = Path(f"example_data/{Path(document_link).name}.pdf") + downloaded_document = Path(f"{Path(document_link).stem}.pdf") download_document(document_link, downloaded_document) if model == "gemini": diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index fa04a2f..39c40e0 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -1,24 +1,15 @@ document,section,question,bool_answer,num_answer,multi_choice_answer -https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use a encoder only architecture,0,, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use a decoder only architecture,0,, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use a encoder-decoder architecture,1,, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,What type of architecture does the model use?-A: decoder only -B: encoder only -C: encoder-decoder,,,C https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the encoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the decoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many parallel attention heads are used,,8, https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use learned embeddings for the input and output tokens,1,, https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use learned positional embeddings,0,, https://arxiv.org/pdf/1706.03762,5 Training,How many GPUs were used for training,,8, -https://arxiv.org/pdf/1706.03762,5 Training,Was the model trained on NVIDIA A100 GPUs,0,, -https://arxiv.org/pdf/1706.03762,5 Training,Was the model trained on NVIDIA P100 GPUs,1,, -https://arxiv.org/pdf/1706.03762,5 Training,Was the SGD optimizer used,0,, -https://arxiv.org/pdf/1706.03762,5 Training,Was the AdamW optimizer used,0,, -https://arxiv.org/pdf/1706.03762,5 Training,Was the Adam optimizer used,1,, -https://arxiv.org/pdf/1706.03762,5 Training,Was a fixed learning rate used,0,, -https://arxiv.org/pdf/1706.03762,5 Training,Was a varied learning rate used,1,, +https://arxiv.org/pdf/1706.03762,5 Training,Was type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4,,,B +https://arxiv.org/pdf/1706.03762,5 Training,Was optimizer was used? -A: AdamW -B: Adam -C: SGD,,,A https://arxiv.org/pdf/1706.03762,5 Training,How many warmup steps were used,,4000, -https://arxiv.org/pdf/1706.03762,5 Training,Was the label dropout regularization used during training,1,, https://arxiv.org/pdf/1706.03762,5 Training,What was the dropout rate used for the base model,,0.1, -https://arxiv.org/pdf/1706.03762,5 Training,Was the label smoothing regularization used during training,1,, https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,How many layers are in the toy model (y = x^2)?,,3, https://arxiv.org/pdf/2210.05190,2.1 Fully Connected Networks,Does the model use Sigmoid activation function?,0,, https://arxiv.org/pdf/2210.05191,3 Experimental Results,How many parameters are in the y = x^2 toy model tree?,,14, From ed71947aa17645357869fa915696e41f7a2f6b85 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:22:28 +0100 Subject: [PATCH 007/120] Update model and prompt --- benchmark/gemini.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index 3efc199..0b54610 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -1,4 +1,3 @@ -import datetime import json import os import time @@ -11,7 +10,7 @@ You can only answer the question based on the information in the document. You will return a JSON name with two keys: "section" and "answer". In `"section"`, you will return the name of the section where you found the answer. -In `"answer"`, you will return the answer either as Yes/No (for boolean questions) or as a single number (for numeric questions). +In `"answer"`, you will return the answer either as Yes/No (for boolean questions) or as a single number (for numeric questions) or as a single letter (for multi-choice questions). Example response: { "section": "1. Introduction", @@ -30,18 +29,9 @@ def gemini_process_document(document_file, document_data): time.sleep(2) file = genai.get_file(file.name) - logger.info("Creating cache") - cache = genai.caching.CachedContent.create( - model="models/gemini-1.5-flash-8b-latest", - display_name="cached file", # used to identify the cache - system_instruction=SYSTEM_PROMPT, - contents=[file], - ttl=datetime.timedelta(minutes=15), - ) - logger.info("Creating model") - model = genai.GenerativeModel.from_cached_content( - cached_content=cache, + model = genai.GenerativeModel( + model_name="gemini-2.0-flash-exp", generation_config={ "temperature": 1, "top_p": 0.95, @@ -66,6 +56,7 @@ def gemini_process_document(document_file, document_data): { "role": "user", "parts": [ + file, question, ], } From fd4fb9519696a9a15e5fef5b76b94f568cc66353 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:23:57 +0100 Subject: [PATCH 008/120] Update --- benchmark/gemini.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index 0b54610..5e8157b 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -11,11 +11,19 @@ You will return a JSON name with two keys: "section" and "answer". In `"section"`, you will return the name of the section where you found the answer. In `"answer"`, you will return the answer either as Yes/No (for boolean questions) or as a single number (for numeric questions) or as a single letter (for multi-choice questions). -Example response: +Example responses: { "section": "1. Introduction", "answer": "No" } +{ + "section": "1. Introduction", + "answer": 2 +} +{ + "section": "1. Introduction", + "answer": "C" +} """ From 5add514335151a6c318c4ec4976415eab6528764 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:31:26 +0100 Subject: [PATCH 009/120] Update --- benchmark/gemini.py | 18 +++++++++++++----- benchmark/structured_qa.csv | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index 5e8157b..f0a731a 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -10,18 +10,26 @@ You can only answer the question based on the information in the document. You will return a JSON name with two keys: "section" and "answer". In `"section"`, you will return the name of the section where you found the answer. -In `"answer"`, you will return the answer either as Yes/No (for boolean questions) or as a single number (for numeric questions) or as a single letter (for multi-choice questions). -Example responses: +In `"answer"`, you will return the answer one of the following JSON: +- Yes/No (for boolean questions) +Is the model an LLM? { "section": "1. Introduction", "answer": "No" } +- Single number (for numeric questions) +How many layers does the model have? { - "section": "1. Introduction", - "answer": 2 + "section": "2. Architecture", + "answer": 12 } +- Single letter (for multiple-choice questions) +What is the activation function used in the model? +-A: ReLU +-B: Sigmoid +-C: Tanh { - "section": "1. Introduction", + "section": "2. Architecture", "answer": "C" } """ diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 39c40e0..21b8843 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -1,5 +1,5 @@ document,section,question,bool_answer,num_answer,multi_choice_answer -https://arxiv.org/pdf/1706.03762,3 Model Architecture,What type of architecture does the model use?-A: decoder only -B: encoder only -C: encoder-decoder,,,C +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder,,,C https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the encoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the decoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many parallel attention heads are used,,8, From 9f8c7559193692d0e224bf42ebd330ed87798815 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:32:16 +0100 Subject: [PATCH 010/120] fix --- benchmark/structured_qa.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 21b8843..227c99d 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -1,5 +1,5 @@ document,section,question,bool_answer,num_answer,multi_choice_answer -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder,,,C +https://arxiv.org/pdf/1706.03762,3 Model Architecture,What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder,,,C https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the encoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the decoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many parallel attention heads are used,,8, From bec2ef148cff4037359c631e234c00caa46034cf Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:34:29 +0100 Subject: [PATCH 011/120] Add system_instruction --- benchmark/gemini.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index f0a731a..ca24e46 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -55,6 +55,7 @@ def gemini_process_document(document_file, document_data): "max_output_tokens": 8192, "response_mime_type": "application/json", }, + system_instruction=SYSTEM_PROMPT, ) logger.info("Predicting") From 08cad02a3da2c940415949d1cd21443519cb5024 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:41:57 +0100 Subject: [PATCH 012/120] Update ratio --- benchmark/gemini.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index ca24e46..d6a72ca 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -63,7 +63,7 @@ def gemini_process_document(document_file, document_data): answers = {} sections = {} for index, row in document_data.iterrows(): - if n > 0 and n % 13 == 0: + if n > 0 and n % 9 == 0: logger.info("Waiting for 60 seconds") time.sleep(60) question = row["question"] From b7ce84eafdfb62c780db135d7d4c3c54e5d38c10 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:45:28 +0100 Subject: [PATCH 013/120] Add more wait --- benchmark/gemini.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index d6a72ca..c0159d8 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -64,8 +64,8 @@ def gemini_process_document(document_file, document_data): sections = {} for index, row in document_data.iterrows(): if n > 0 and n % 9 == 0: - logger.info("Waiting for 60 seconds") - time.sleep(60) + logger.info("Waiting for 90 seconds") + time.sleep(90) question = row["question"] logger.debug(f"Question: {question}") chat_session = model.start_chat( From 6fc48fee6721ffd77704d7e3f92331c0374d369c Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:49:31 +0100 Subject: [PATCH 014/120] Fix return --- benchmark/gemini.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index c0159d8..c5e0bcf 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -64,8 +64,8 @@ def gemini_process_document(document_file, document_data): sections = {} for index, row in document_data.iterrows(): if n > 0 and n % 9 == 0: - logger.info("Waiting for 90 seconds") - time.sleep(90) + logger.info("Waiting for 60 seconds") + time.sleep(60) question = row["question"] logger.debug(f"Question: {question}") chat_session = model.start_chat( @@ -86,3 +86,4 @@ def gemini_process_document(document_file, document_data): answers[index] = response_json["answer"] sections[index] = response_json["section"] n += 1 + return answers, sections From 8929e9e99a056b3978f2888006f7ba466b991086 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:57:24 +0100 Subject: [PATCH 015/120] Fix URLs --- benchmark/structured_qa.csv | 42 ++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 227c99d..3547eb1 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -11,31 +11,31 @@ https://arxiv.org/pdf/1706.03762,5 Training,Was optimizer was used? -A: AdamW -B https://arxiv.org/pdf/1706.03762,5 Training,How many warmup steps were used,,4000, https://arxiv.org/pdf/1706.03762,5 Training,What was the dropout rate used for the base model,,0.1, https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,How many layers are in the toy model (y = x^2)?,,3, -https://arxiv.org/pdf/2210.05190,2.1 Fully Connected Networks,Does the model use Sigmoid activation function?,0,, -https://arxiv.org/pdf/2210.05191,3 Experimental Results,How many parameters are in the y = x^2 toy model tree?,,14, -https://arxiv.org/pdf/2210.05192,2.4 Recurrent Networks,Can recurrent networks also be converted to decision trees?,1,, -https://arxiv.org/pdf/2210.05193,3 Experimental Results,How many layers are in the half-moon neural network?,,3, -https://arxiv.org/pdf/2210.05194,3 Experimental Results,"What is the main computational advantage of decision trees? A: Less storage memory, B: Fewer operations, C: Lower accuracy",,,B +https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,Does the model use Sigmoid activation function?,0,, +https://arxiv.org/pdf/2210.05189,3 Experimental Results,How many parameters are in the y = x^2 toy model tree?,,14, +https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,Can recurrent networks also be converted to decision trees?,1,, +https://arxiv.org/pdf/2210.05189,3 Experimental Results,How many layers are in the half-moon neural network?,,3, +https://arxiv.org/pdf/2210.05189,3 Experimental Results,"What is the main computational advantage of decision trees? A: Less storage memory, B: Fewer operations, C: Lower accuracy",,,B https://arxiv.org/pdf/2106.09685v2.pdf,4 Our Method,Does LoRA work with any neural network containing dense layers?,1,, https://arxiv.org/pdf/2106.09685v2.pdf,5.5 Scaling Up to GPT-3,"How much memory is saved when training GPT-3 175B with LoRA compared to full fine-tuning? A: 850GB, B: 100GB, C: 5GB",,,A https://arxiv.org/pdf/2106.09685v2.pdf,Abstract,"By how much can LoRA reduce GPU memory requirements during training? A: 10x, B: 5x, C: 3x",,,C https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,"In billions, how many trainable parameters does GPT-3 have?",,175, https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,Does LoRA introduce additional inference latency compared to full fine-tuning?,0,, https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Prohibited AI Practices (Article 5),"Which type of AI systems are banned by the AI Act? (A) High-risk systems, (B) Manipulative systems, (C) Real-time biometric systems in public spaces",,,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401690,Requirements for High-Risk AI Systems (Article 10),"what is a requirement for datasets used in high-risk AI systems? (A) Exclusively open-source datasets, (B) Datasets ensuring quality and diversity, (C) Datasets not exceeding 1 GB in size",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401691,Classification rules (article 51),"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Requirements for High-Risk AI Systems (Article 10),"what is a requirement for datasets used in high-risk AI systems? (A) Exclusively open-source datasets, (B) Datasets ensuring quality and diversity, (C) Datasets not exceeding 1 GB in size",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Classification rules (article 51),"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? A) 10^15, B) 10^20, C) 10^25",,,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401692,"TRANSPARENCY OBLIGATIONS FOR PROVIDERS AND DEPLOYERS OF CERTAIN AI SYSTEMS +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"TRANSPARENCY OBLIGATIONS FOR PROVIDERS AND DEPLOYERS OF CERTAIN AI SYSTEMS (Article 50)","What should providers of AI systems that generate synthetic content ensure? A) That the content is not marked in any way. B) That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. C) That there is no way to detect that the content is synthetic.",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401693,Sharing of information on serious incidents (article 73),How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? A) 3 days B) 7 days C) 14 days,,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401694,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes (Article 60),"What is the maximum duration of testing in real-world conditions? a) 3 months b) 6 months, with a possible extension of an additional 6 months. c) 12 months",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401695,Penalties (Article 99),"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? A) 7,500,000 EUR or 1% of annual turnover, whichever is higher. B) 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher C) 10,000,000 EUR or 5% of annual turnover, whichever is higher",,,A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401696,Code of practice (article 56),By what date should codes of practice be ready? a) 2 May 2025 b) 2 May 2024 c) 2 August 2025,,,A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401697,"Compliant AI systems which present a risk (article 82) +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Sharing of information on serious incidents (article 73),How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? A) 3 days B) 7 days C) 14 days,,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes (Article 60),"What is the maximum duration of testing in real-world conditions? a) 3 months b) 6 months, with a possible extension of an additional 6 months. c) 12 months",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Penalties (Article 99),"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? A) 7,500,000 EUR or 1% of annual turnover, whichever is higher. B) 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher C) 10,000,000 EUR or 5% of annual turnover, whichever is higher",,,A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Code of practice (article 56),By what date should codes of practice be ready? a) 2 May 2025 b) 2 May 2024 c) 2 August 2025,,,A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"Compliant AI systems which present a risk (article 82) ",What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? a) 1 month b) 2 months c) Immediately,,,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401698,EU declaration of conformity (article 47),"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? a) 5 years b) 10 years c) 15 years",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401699,Establishment and structure of the European Artificial Intelligence Board (article 65),"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? a) 2 years, renewable once b) 3 years, renewable once c) 4 years, renewable once",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,EU declaration of conformity (article 47),"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? a) 5 years b) 10 years c) 15 years",,,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Establishment and structure of the European Artificial Intelligence Board (article 65),"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? a) 2 years, renewable once b) 3 years, renewable once c) 4 years, renewable once",,,B https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,"According to the guide, what is the typical license used to grant reuse rights with libre open access? a) GNU General Public License b) Creative Commons license c) MIT license",,,B https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? a) Over 10,000 b) Over 20,000 c) Exactly 30,000",,,A https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 2 Benefits of Open Access,what is the term of office for members of the advisory board of the Authors Alliance? a) The source does not specify a term of office for the advisory board. b) 2 years c) 4 years,,,A @@ -44,9 +44,9 @@ https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Allian https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bull and Melinda Gates foundation implement an open access policy?,,2015, https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,0,, https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,0,, -https://arxiv.org/pdf/2201.11904,3.1 Experimental Setup,How many large language models were evaluated?,,5, -https://arxiv.org/pdf/2201.11905,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,,5, -https://arxiv.org/pdf/2201.11906,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,1,, -https://arxiv.org/pdf/2201.11907,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,,3, -https://arxiv.org/pdf/2201.11908,3.2 Results,How many random samples for examined to understand model errors?,,50, -https://arxiv.org/pdf/2201.11909,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? A: Coin Flip, B: Tower of Hanoi, C: Chess puzzles",,,A +https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,,5, +https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,,5, +https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,1,, +https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,,3, +https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples for examined to understand model errors?,,50, +https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? A: Coin Flip, B: Tower of Hanoi, C: Chess puzzles",,,A From 4a9e75ee5aa1f33de7b3df3db328405998ea9d5b Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:57:56 +0100 Subject: [PATCH 016/120] Update download name --- benchmark/run_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index 2a26e81..ca42d63 100644 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -26,7 +26,7 @@ def run_benchmark(input_data: str, output_file: str, model: str): for document_link, document_data in data.groupby("document"): logger.info(f"Downloading document {document_link}") - downloaded_document = Path(f"{Path(document_link).stem}.pdf") + downloaded_document = Path(f"{Path(document_link).name}.pdf") download_document(document_link, downloaded_document) if model == "gemini": From 41ffc23f88eb7d57201a81b9cf937317c610fdec Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 14:59:40 +0100 Subject: [PATCH 017/120] Update --- benchmark/gemini.py | 4 ++-- benchmark/structured_qa.csv | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmark/gemini.py b/benchmark/gemini.py index c5e0bcf..f04a244 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -12,7 +12,7 @@ In `"section"`, you will return the name of the section where you found the answer. In `"answer"`, you will return the answer one of the following JSON: - Yes/No (for boolean questions) -Is the model an LLM? +Is the model an LLM? { "section": "1. Introduction", "answer": "No" @@ -47,7 +47,7 @@ def gemini_process_document(document_file, document_data): logger.info("Creating model") model = genai.GenerativeModel( - model_name="gemini-2.0-flash-exp", + model_name="gemini-2.0-flash-exp", generation_config={ "temperature": 1, "top_p": 0.95, diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 3547eb1..4ed6a72 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -3,8 +3,8 @@ https://arxiv.org/pdf/1706.03762,3 Model Architecture,What type of architecture https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the encoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the decoder,,6, https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many parallel attention heads are used,,8, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use learned embeddings for the input and output tokens,1,, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the model use learned positional embeddings,0,, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the final model use learned embeddings for the input and output tokens,1,, +https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the final model use learned positional embeddings,0,, https://arxiv.org/pdf/1706.03762,5 Training,How many GPUs were used for training,,8, https://arxiv.org/pdf/1706.03762,5 Training,Was type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4,,,B https://arxiv.org/pdf/1706.03762,5 Training,Was optimizer was used? -A: AdamW -B: Adam -C: SGD,,,A From 4390852c9d7679ae832822e2f6728acd94677c7f Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 17:13:19 +0100 Subject: [PATCH 018/120] Update --- benchmark/structured_qa.csv | 100 +++++++++++++++++------------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 4ed6a72..28d322b 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -1,52 +1,48 @@ -document,section,question,bool_answer,num_answer,multi_choice_answer -https://arxiv.org/pdf/1706.03762,3 Model Architecture,What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder,,,C -https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the encoder,,6, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many layers compose the decoder,,6, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,How many parallel attention heads are used,,8, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the final model use learned embeddings for the input and output tokens,1,, -https://arxiv.org/pdf/1706.03762,3 Model Architecture,Does the final model use learned positional embeddings,0,, -https://arxiv.org/pdf/1706.03762,5 Training,How many GPUs were used for training,,8, -https://arxiv.org/pdf/1706.03762,5 Training,Was type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4,,,B -https://arxiv.org/pdf/1706.03762,5 Training,Was optimizer was used? -A: AdamW -B: Adam -C: SGD,,,A -https://arxiv.org/pdf/1706.03762,5 Training,How many warmup steps were used,,4000, -https://arxiv.org/pdf/1706.03762,5 Training,What was the dropout rate used for the base model,,0.1, -https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,How many layers are in the toy model (y = x^2)?,,3, -https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,Does the model use Sigmoid activation function?,0,, -https://arxiv.org/pdf/2210.05189,3 Experimental Results,How many parameters are in the y = x^2 toy model tree?,,14, -https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,Can recurrent networks also be converted to decision trees?,1,, -https://arxiv.org/pdf/2210.05189,3 Experimental Results,How many layers are in the half-moon neural network?,,3, -https://arxiv.org/pdf/2210.05189,3 Experimental Results,"What is the main computational advantage of decision trees? A: Less storage memory, B: Fewer operations, C: Lower accuracy",,,B -https://arxiv.org/pdf/2106.09685v2.pdf,4 Our Method,Does LoRA work with any neural network containing dense layers?,1,, -https://arxiv.org/pdf/2106.09685v2.pdf,5.5 Scaling Up to GPT-3,"How much memory is saved when training GPT-3 175B with LoRA compared to full fine-tuning? A: 850GB, B: 100GB, C: 5GB",,,A -https://arxiv.org/pdf/2106.09685v2.pdf,Abstract,"By how much can LoRA reduce GPU memory requirements during training? A: 10x, B: 5x, C: 3x",,,C -https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,"In billions, how many trainable parameters does GPT-3 have?",,175, -https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,Does LoRA introduce additional inference latency compared to full fine-tuning?,0,, -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Prohibited AI Practices (Article 5),"Which type of AI systems are banned by the AI Act? (A) High-risk systems, (B) Manipulative systems, (C) Real-time biometric systems in public spaces",,,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Requirements for High-Risk AI Systems (Article 10),"what is a requirement for datasets used in high-risk AI systems? (A) Exclusively open-source datasets, (B) Datasets ensuring quality and diversity, (C) Datasets not exceeding 1 GB in size",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Classification rules (article 51),"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A) 10^15, B) 10^20, C) 10^25",,,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"TRANSPARENCY OBLIGATIONS FOR PROVIDERS AND DEPLOYERS OF CERTAIN AI SYSTEMS -(Article 50)","What should providers of AI systems that generate synthetic content ensure? -A) That the content is not marked in any way. B) That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. C) That there is no way to detect that the content is synthetic.",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Sharing of information on serious incidents (article 73),How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? A) 3 days B) 7 days C) 14 days,,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes (Article 60),"What is the maximum duration of testing in real-world conditions? a) 3 months b) 6 months, with a possible extension of an additional 6 months. c) 12 months",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Penalties (Article 99),"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? A) 7,500,000 EUR or 1% of annual turnover, whichever is higher. B) 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher C) 10,000,000 EUR or 5% of annual turnover, whichever is higher",,,A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Code of practice (article 56),By what date should codes of practice be ready? a) 2 May 2025 b) 2 May 2024 c) 2 August 2025,,,A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"Compliant AI systems which present a risk (article 82) -",What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? a) 1 month b) 2 months c) Immediately,,,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,EU declaration of conformity (article 47),"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? a) 5 years b) 10 years c) 15 years",,,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Establishment and structure of the European Artificial Intelligence Board (article 65),"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? a) 2 years, renewable once b) 3 years, renewable once c) 4 years, renewable once",,,B -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,"According to the guide, what is the typical license used to grant reuse rights with libre open access? a) GNU General Public License b) Creative Commons license c) MIT license",,,B -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? a) Over 10,000 b) Over 20,000 c) Exactly 30,000",,,A -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 2 Benefits of Open Access,what is the term of office for members of the advisory board of the Authors Alliance? a) The source does not specify a term of office for the advisory board. b) 2 years c) 4 years,,,A -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,Does open access eliminate price barriers?,1,, -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chatper 1 What is this guide and who is it for,Are publication fees required for all open access journals?,0,, -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bull and Melinda Gates foundation implement an open access policy?,,2015, -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,0,, -https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,0,, -https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,,5, -https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,,5, -https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,1,, -https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,,3, -https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples for examined to understand model errors?,,50, -https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? A: Coin Flip, B: Tower of Hanoi, C: Chess puzzles",,,A +document,section,question,answer +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder",C +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many layers compose the encoder?",6 +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many layers compose the decoder?",6 +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many parallel attention heads are used?",8 +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned embeddings for the input and output tokens?",Yes +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned positional embeddings?",No +https://arxiv.org/pdf/1706.03762,5 Training,"How many GPUs were used for training?",8 +https://arxiv.org/pdf/1706.03762,5 Training,"What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4",B +https://arxiv.org/pdf/1706.03762,5 Training,"What optimizer was used for trainin? -A: AdamW -B: Adam -C: SGD",A +https://arxiv.org/pdf/1706.03762,5 Training,"How many warmup steps were used?",4000 +https://arxiv.org/pdf/1706.03762,5 Training,"What was the dropout rate used for the base model?",0.1 +https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"How many layers are in the toy model (y = x^2)?",3 +https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"Does the model use Sigmoid activation function?",No +https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many parameters are in the y = x^2 toy model tree?",14 +https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,"Can recurrent networks also be converted to decision trees?",Yes +https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the half-moon neural network?",3 +https://arxiv.org/pdf/2210.05189,3 Experimental Results,"What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy",B +https://arxiv.org/pdf/2106.09685v2.pdf,4 Our Method,Does LoRA work with any neural network containing dense layers?,Yes +https://arxiv.org/pdf/2106.09685v2.pdf,5.5 Scaling Up to GPT-3,"How much memory is saved when training GPT-3 175B with LoRA compared to full fine-tuning? -A: 850GB, -B: 100GB, -C: 5GB",A +https://arxiv.org/pdf/2106.09685v2.pdf,Abstract,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C +https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,"In billions, how many trainable parameters does GPT-3 have?",175 +https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,Does LoRA introduce additional inference latency compared to full fine-tuning?,No +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Prohibited AI Practices (Article 5),"Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces",C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Requirements for High-Risk AI Systems (Article 10),"what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Classification rules (article 51),"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25",C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"TRANSPARENCY OBLIGATIONS FOR PROVIDERS AND DEPLOYERS OF CERTAIN AI SYSTEMS(Article 50)","What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Sharing of information on serious incidents (article 73),How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes (Article 60),"What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Penalties (Article 99),"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher",A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Code of practice (article 56),By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025,A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"Compliant AI systems which present a risk (article 82)",What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately,C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,EU declaration of conformity (article 47),"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Establishment and structure of the European Artificial Intelligence Board (article 65),"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once",B +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,"According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license",B +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000",A +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 2 Benefits of Open Access,what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years,A +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,Does open access eliminate price barriers?,Yes +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chatper 1 What is this guide and who is it for,Are publication fees required for all open access journals?,No +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bull and Melinda Gates foundation implement an open access policy?,2015 +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,No +https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,No +https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,5 +https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,5 +https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,Yes +https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,3 +https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples for examined to understand model errors?,50 +https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A From 68621eb4aa71327a8bacaa73aefb31d7c946a781 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 17:35:23 +0100 Subject: [PATCH 019/120] Update with upper --- benchmark/run_benchmark.py | 2 +- benchmark/structured_qa.csv | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index ca42d63..6ac681e 100644 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -35,7 +35,7 @@ def run_benchmark(input_data: str, output_file: str, model: str): ) for index in document_data.index: - data.loc[index, "pred_answer"] = answers[index] + data.loc[index, "pred_answer"] = answers[index].upper() data.loc[index, "pred_section"] = sections[index] data.to_csv(output_file) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 28d322b..02db7af 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -3,24 +3,24 @@ https://arxiv.org/pdf/1706.03762,3 Model Architecture,"What type of architecture https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many layers compose the encoder?",6 https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many layers compose the decoder?",6 https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many parallel attention heads are used?",8 -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned embeddings for the input and output tokens?",Yes -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned positional embeddings?",No +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned embeddings for the input and output tokens?",YES +https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned positional embeddings?",NO https://arxiv.org/pdf/1706.03762,5 Training,"How many GPUs were used for training?",8 https://arxiv.org/pdf/1706.03762,5 Training,"What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4",B -https://arxiv.org/pdf/1706.03762,5 Training,"What optimizer was used for trainin? -A: AdamW -B: Adam -C: SGD",A +https://arxiv.org/pdf/1706.03762,5 Training,"What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",A https://arxiv.org/pdf/1706.03762,5 Training,"How many warmup steps were used?",4000 https://arxiv.org/pdf/1706.03762,5 Training,"What was the dropout rate used for the base model?",0.1 https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"How many layers are in the toy model (y = x^2)?",3 -https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"Does the model use Sigmoid activation function?",No +https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"Does the model use Sigmoid activation function?",NO https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many parameters are in the y = x^2 toy model tree?",14 -https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,"Can recurrent networks also be converted to decision trees?",Yes +https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,"Can recurrent networks also be converted to decision trees?",YES https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the half-moon neural network?",3 https://arxiv.org/pdf/2210.05189,3 Experimental Results,"What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy",B -https://arxiv.org/pdf/2106.09685v2.pdf,4 Our Method,Does LoRA work with any neural network containing dense layers?,Yes -https://arxiv.org/pdf/2106.09685v2.pdf,5.5 Scaling Up to GPT-3,"How much memory is saved when training GPT-3 175B with LoRA compared to full fine-tuning? -A: 850GB, -B: 100GB, -C: 5GB",A +https://arxiv.org/pdf/2106.09685v2.pdf,4 Our Method,Does LoRA work with any neural network containing dense layers?,YES +https://arxiv.org/pdf/2106.09685v2.pdf,5.5 Scaling Up to GPT-3,"How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?",850 https://arxiv.org/pdf/2106.09685v2.pdf,Abstract,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,"In billions, how many trainable parameters does GPT-3 have?",175 -https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,Does LoRA introduce additional inference latency compared to full fine-tuning?,No +https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,Does LoRA introduce additional inference latency compared to full fine-tuning?,NO https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Prohibited AI Practices (Article 5),"Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces",C https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Requirements for High-Risk AI Systems (Article 10),"what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size",B https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Classification rules (article 51),"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25",C @@ -35,14 +35,14 @@ https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Establish https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,"According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license",B https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000",A https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 2 Benefits of Open Access,what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years,A -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,Does open access eliminate price barriers?,Yes -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chatper 1 What is this guide and who is it for,Are publication fees required for all open access journals?,No +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,Does open access eliminate price barriers?,YES +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chatper 1 What is this guide and who is it for,Are publication fees required for all open access journals?,NO https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bull and Melinda Gates foundation implement an open access policy?,2015 -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,No -https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,No +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,NO +https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,NO https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,5 https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,5 -https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,Yes +https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,YES https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,3 https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples for examined to understand model errors?,50 https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A From 422e5d5ab5a06013120beaae816a821369d241df Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 17:37:52 +0100 Subject: [PATCH 020/120] Cast to str --- benchmark/run_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index 6ac681e..e086857 100644 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -35,7 +35,7 @@ def run_benchmark(input_data: str, output_file: str, model: str): ) for index in document_data.index: - data.loc[index, "pred_answer"] = answers[index].upper() + data.loc[index, "pred_answer"] = str(answers[index]).upper() data.loc[index, "pred_section"] = sections[index] data.to_csv(output_file) From 304097841f672b1af343f50ab294d74fcc9f61b3 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 21:02:40 +0100 Subject: [PATCH 021/120] Extend --- benchmark/structured_qa.csv | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 02db7af..c41c84e 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -7,7 +7,7 @@ https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned positional embeddings?",NO https://arxiv.org/pdf/1706.03762,5 Training,"How many GPUs were used for training?",8 https://arxiv.org/pdf/1706.03762,5 Training,"What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4",B -https://arxiv.org/pdf/1706.03762,5 Training,"What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",A +https://arxiv.org/pdf/1706.03762,5 Training,"What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",B https://arxiv.org/pdf/1706.03762,5 Training,"How many warmup steps were used?",4000 https://arxiv.org/pdf/1706.03762,5 Training,"What was the dropout rate used for the base model?",0.1 https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"How many layers are in the toy model (y = x^2)?",3 @@ -46,3 +46,18 @@ https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usua https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,3 https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples for examined to understand model errors?,50 https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many chapters does the game last?,3 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many victory conditions are there?,3 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOALEND OF THE GAME,How many different races are there?,6 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Which player begins the game? -A: Sauron -B: The Fellowship -C: Other,A +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Can you take a Chapter card and a Landmark tile on your same turn?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,How many goins does a player take when discarding a card during Chapter 3?,3 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,After taking a landmark tile, do you reveal a new tile and the end of your turn?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,If a player is missing 2 skill symbols, how many coins must they pay to the reserve?,2 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,Can you use a symbol more than once per turn?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue,B +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3,C +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CONQUERING MIDDLE-EARTH,If you place or move an unit and an enemy fortress is present, does it trigger a conflict?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,Can the game end in a tie?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,In how many regions do you need to be present to win the game?,7 From bc0d8ce658ecb85425b52f7785c437b88495b317 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 21:10:11 +0100 Subject: [PATCH 022/120] Add benchmark --- benchmark/benchmark.ipynb | 115 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index e69de29..451a37b 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## GPU Check" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, you'll need to enable GPUs for the notebook:\n", + "\n", + "- Navigate to `Edit`→`Notebook Settings`\n", + "- Select T4 GPU from the Hardware Accelerator section\n", + "- Click `Save` and accept.\n", + "\n", + "Next, we'll confirm that we can connect to the GPU:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "if not torch.cuda.is_available():\n", + " raise RuntimeError(\"GPU not available\")\n", + "else:\n", + " print(\"GPU is available!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!git clone --branch 5-add-benchmark --single-branch https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -e structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!export GEMINI_API_KEY=\"\" python structured-qa/benchmark/run_benchmark.py --input_data structured-qa/benchmark/structured_qa.csv --output_file results.csv --model \"gemini\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 03e0e60c1954b18daf6b8087578b70c7602edf88 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 21:14:51 +0100 Subject: [PATCH 023/120] Fix --- benchmark/structured_qa.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index c41c84e..272c3a9 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -52,7 +52,7 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_R https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Which player begins the game? -A: Sauron -B: The Fellowship -C: Other,A https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Can you take a Chapter card and a Landmark tile on your same turn?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,How many goins does a player take when discarding a card during Chapter 3?,3 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,After taking a landmark tile, do you reveal a new tile and the end of your turn?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,"After taking a landmark tile, do you reveal a new tile and the end of your turn?",NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?,YES https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,If a player is missing 2 skill symbols, how many coins must they pay to the reserve?,2 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,Can you use a symbol more than once per turn?,NO From c19738e5a856ec79320812966e79ba417fce1c75 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 20 Jan 2025 21:15:39 +0100 Subject: [PATCH 024/120] fix --- benchmark/structured_qa.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 272c3a9..f14988c 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -54,10 +54,10 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_R https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,How many goins does a player take when discarding a card during Chapter 3?,3 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,"After taking a landmark tile, do you reveal a new tile and the end of your turn?",NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?,YES -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,If a player is missing 2 skill symbols, how many coins must they pay to the reserve?,2 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,"If a player is missing 2 skill symbols, how many coins must they pay to the reserve?",2 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,Can you use a symbol more than once per turn?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue,B https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3,C -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CONQUERING MIDDLE-EARTH,If you place or move an unit and an enemy fortress is present, does it trigger a conflict?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CONQUERING MIDDLE-EARTH,"If you place or move an unit and an enemy fortress is present, does it trigger a conflict?",NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,Can the game end in a tie?,YES https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,In how many regions do you need to be present to win the game?,7 From 3cd7b24a906a79351e952a772abbc0b40ad048d2 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 12:15:13 +0100 Subject: [PATCH 025/120] Drop export --- benchmark/benchmark.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index 451a37b..5743cb8 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -95,7 +95,7 @@ "metadata": {}, "outputs": [], "source": [ - "!export GEMINI_API_KEY=\"\" python structured-qa/benchmark/run_benchmark.py --input_data structured-qa/benchmark/structured_qa.csv --output_file results.csv --model \"gemini\"" + "!GEMINI_API_KEY=\"\" python structured-qa/benchmark/run_benchmark.py --input_data structured-qa/benchmark/structured_qa.csv --output_file results.csv --model \"gemini\"" ] } ], From 22df32b94cba162bafc4644b10879ffdaf3ad51f Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 14:30:50 +0100 Subject: [PATCH 026/120] Updates --- benchmark/benchmark.ipynb | 36 ++++++++++++++++++++++++++++++++++++ benchmark/gemini.py | 11 ++++++++--- benchmark/structured_qa.csv | 9 +++++++++ benchmark/workflow.py | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 benchmark/workflow.py diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index 5743cb8..b8d0083 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -97,6 +97,42 @@ "source": [ "!GEMINI_API_KEY=\"\" python structured-qa/benchmark/run_benchmark.py --input_data structured-qa/benchmark/structured_qa.csv --output_file results.csv --model \"gemini\"" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] } ], "metadata": { diff --git a/benchmark/gemini.py b/benchmark/gemini.py index f04a244..70dc927 100644 --- a/benchmark/gemini.py +++ b/benchmark/gemini.py @@ -35,7 +35,12 @@ """ -def gemini_process_document(document_file, document_data): +def gemini_process_document( + document_file, + document_data, + model_name: str = "gemini-2.0-flash-exp", + system_prompt: str = SYSTEM_PROMPT, +): genai.configure(api_key=os.environ["GEMINI_API_KEY"]) logger.info("Uploading file") @@ -47,7 +52,7 @@ def gemini_process_document(document_file, document_data): logger.info("Creating model") model = genai.GenerativeModel( - model_name="gemini-2.0-flash-exp", + model_name=model_name, generation_config={ "temperature": 1, "top_p": 0.95, @@ -55,7 +60,7 @@ def gemini_process_document(document_file, document_data): "max_output_tokens": 8192, "response_mime_type": "application/json", }, - system_instruction=SYSTEM_PROMPT, + system_instruction=system_prompt, ) logger.info("Predicting") diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index f14988c..212b682 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -61,3 +61,12 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_R https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CONQUERING MIDDLE-EARTH,"If you place or move an unit and an enemy fortress is present, does it trigger a conflict?",NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,Can the game end in a tie?,YES https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,In how many regions do you need to be present to win the game?,7 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOOKOUT PHASE,What is the maximum number of cards a player may acquire during the lookout phase?,4 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOOKOUT PHASE,Is there a limit to the number of cards a player may have in their hand?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTION PHASE,"Can you raid the locations of a player that has passed during the action phase?",NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,Can players conquer and pillage the same island during the expedition phase?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTION PHASEGAME END,How many points in the scoreboard must be reached during the Action phase to trigger the final round?,25 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,CLEANUP PHASE,Is there a cleanup phase in the final round?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,BUILD A LOCATION,How many victory points are granted by a built Field Location card that work as an upgrade?,1 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTIONS,Can you use the raid action without a Raze token?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,Can the game end in a tie?,YES diff --git a/benchmark/workflow.py b/benchmark/workflow.py new file mode 100644 index 0000000..0aa792f --- /dev/null +++ b/benchmark/workflow.py @@ -0,0 +1,36 @@ +from loguru import logger + +from structured_qa.config import FIND_PROMPT, ANSWER_PROMPT +from structured_qa.model_loaders import load_llama_cpp_model +from structured_qa.preprocessing import document_to_sections_dir +from structured_qa.workflow import find_retrieve_answer + + +def workflow_process_document( + document_file, + document_data, + model_id: str = "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf", + find_prompt: str = FIND_PROMPT, + answer_prompt: str = ANSWER_PROMPT, +): + logger.info("Creating model") + model = load_llama_cpp_model(model_id) + + logger.info("Splitting document into sections") + sections_dir = "sections" + document_to_sections_dir(document_file, sections_dir) + + logger.info("Predicting") + answers = {} + sections = {} + for index, row in document_data.iterrows(): + question = row["question"] + logger.debug(f"Question: {question}") + answer, sections_checked = find_retrieve_answer( + question, model, sections_dir, find_prompt, answer_prompt + ) + + answers[index] = answer + sections[index] = sections_checked[-1] + + return answers, sections From b35dc23b90c9d1b5755c7a07526fda8b075049cb Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 14:32:05 +0100 Subject: [PATCH 027/120] Update default model --- benchmark/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/workflow.py b/benchmark/workflow.py index 0aa792f..5606b44 100644 --- a/benchmark/workflow.py +++ b/benchmark/workflow.py @@ -9,7 +9,7 @@ def workflow_process_document( document_file, document_data, - model_id: str = "bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf", + model_id: str = "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf", find_prompt: str = FIND_PROMPT, answer_prompt: str = ANSWER_PROMPT, ): From 6cf13d7795824922b8a1697c9c00adaebf7ecd77 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 14:33:03 +0100 Subject: [PATCH 028/120] Update --- benchmark/run_benchmark.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index e086857..0013167 100644 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -7,6 +7,7 @@ from gemini import gemini_process_document +from workflow import workflow_process_document def download_document(url, output_file): @@ -33,6 +34,10 @@ def run_benchmark(input_data: str, output_file: str, model: str): answers, sections = gemini_process_document( downloaded_document, document_data ) + elif model == "workflow": + answers, sections = workflow_process_document( + downloaded_document, document_data + ) for index in document_data.index: data.loc[index, "pred_answer"] = str(answers[index]).upper() From ad1ef9b5c98ed467ac4428b19e92498dad56b006 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 14:45:18 +0100 Subject: [PATCH 029/120] Use info --- benchmark/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/workflow.py b/benchmark/workflow.py index 5606b44..a6dab95 100644 --- a/benchmark/workflow.py +++ b/benchmark/workflow.py @@ -25,7 +25,7 @@ def workflow_process_document( sections = {} for index, row in document_data.iterrows(): question = row["question"] - logger.debug(f"Question: {question}") + logger.info(f"Question: {question}") answer, sections_checked = find_retrieve_answer( question, model, sections_dir, find_prompt, answer_prompt ) From f237b89bf9c7db481b3e899fbb962c56f000e364 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 14:45:38 +0100 Subject: [PATCH 030/120] Update with None --- benchmark/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/workflow.py b/benchmark/workflow.py index a6dab95..2eaa8cc 100644 --- a/benchmark/workflow.py +++ b/benchmark/workflow.py @@ -31,6 +31,6 @@ def workflow_process_document( ) answers[index] = answer - sections[index] = sections_checked[-1] + sections[index] = sections_checked[-1] if sections_checked else None return answers, sections From a34f4e2c022b57e24dce3e284c66fa583aeeb54d Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 16:57:02 +0100 Subject: [PATCH 031/120] Add answer type --- benchmark/workflow.py | 35 ++++++++++++++++++++++++++++-- src/structured_qa/preprocessing.py | 1 - 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/benchmark/workflow.py b/benchmark/workflow.py index 2eaa8cc..6ee6d05 100644 --- a/benchmark/workflow.py +++ b/benchmark/workflow.py @@ -1,17 +1,37 @@ from loguru import logger -from structured_qa.config import FIND_PROMPT, ANSWER_PROMPT +from structured_qa.config import FIND_PROMPT from structured_qa.model_loaders import load_llama_cpp_model from structured_qa.preprocessing import document_to_sections_dir from structured_qa.workflow import find_retrieve_answer +ANSWER_WITH_TYPE_PROMPT = """ +You are a rigorous assistant answering questions. +You only answer based on the current information available. +You should only answer with ANSWER_TYPE. + +The current information available is: + +``` +{CURRENT_INFO} +``` + +If the current information available not enough to answer the question, +you must return the following message and nothing else: + +``` +I need more info. +``` +""" + + def workflow_process_document( document_file, document_data, model_id: str = "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf", find_prompt: str = FIND_PROMPT, - answer_prompt: str = ANSWER_PROMPT, + answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, ): logger.info("Creating model") model = load_llama_cpp_model(model_id) @@ -25,6 +45,17 @@ def workflow_process_document( sections = {} for index, row in document_data.iterrows(): question = row["question"] + try: + float(row["answer"]) + answer_type = "a single number" + except ValueError: + if row["answer"] in ("YES", "NO"): + answer_type = "YES or NO" + else: + answer_type = "a single letter" + + answer_prompt = answer_prompt.replace("ANSWER_TYPE", answer_type) + logger.info(f"Question: {question}") answer, sections_checked = find_retrieve_answer( question, model, sections_dir, find_prompt, answer_prompt diff --git a/src/structured_qa/preprocessing.py b/src/structured_qa/preprocessing.py index f85d701..a9d9c0c 100644 --- a/src/structured_qa/preprocessing.py +++ b/src/structured_qa/preprocessing.py @@ -92,7 +92,6 @@ def document_to_sections_dir(input_file: str, output_dir: str) -> list[str]: md_text, ) logger.success(f"Found {len(sections)} sections") - logger.info(f"Writing sections to {output_dir}") output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) From 291e37697dc7afc213830130846f8d5a3f1e9dee Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 18:27:59 +0100 Subject: [PATCH 032/120] Refactor --- benchmark/benchmark.ipynb | 24 +++++++++++++++---- src/structured_qa/benchmark/__init__.py | 0 .../benchmark/find_retrieve_answer.py | 5 ++-- .../structured_qa/benchmark}/gemini.py | 0 .../structured_qa/benchmark}/run_benchmark.py | 20 ++++++---------- 5 files changed, 29 insertions(+), 20 deletions(-) create mode 100644 src/structured_qa/benchmark/__init__.py rename benchmark/workflow.py => src/structured_qa/benchmark/find_retrieve_answer.py (94%) rename {benchmark => src/structured_qa/benchmark}/gemini.py (100%) rename {benchmark => src/structured_qa/benchmark}/run_benchmark.py (70%) diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index b8d0083..0d33916 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -77,7 +77,7 @@ "metadata": {}, "outputs": [], "source": [ - "!git clone --branch 5-add-benchmark --single-branch https://github.com/mozilla-ai/structured-qa" + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" ] }, { @@ -86,7 +86,14 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install -e structured-qa" + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Find Retrieve Answer" ] }, { @@ -95,14 +102,21 @@ "metadata": {}, "outputs": [], "source": [ - "!GEMINI_API_KEY=\"\" python structured-qa/benchmark/run_benchmark.py --input_data structured-qa/benchmark/structured_qa.csv --output_file results.csv --model \"gemini\"" + "from structured_qa.benchmark.run_benchmark import run_benchmark\n", + "from structured_qa.benchmark.find_retrieve_answer import fra_process_document" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "# Results" + "run_benchmark(\n", + " input_data=\"structured_qa.csv\",\n", + " output_file=\"fra_results.csv\",\n", + " process_document=fra_process_document,\n", + ")" ] }, { diff --git a/src/structured_qa/benchmark/__init__.py b/src/structured_qa/benchmark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/workflow.py b/src/structured_qa/benchmark/find_retrieve_answer.py similarity index 94% rename from benchmark/workflow.py rename to src/structured_qa/benchmark/find_retrieve_answer.py index 6ee6d05..ba2b606 100644 --- a/benchmark/workflow.py +++ b/src/structured_qa/benchmark/find_retrieve_answer.py @@ -1,3 +1,4 @@ +from pathlib import Path from loguru import logger from structured_qa.config import FIND_PROMPT @@ -26,7 +27,7 @@ """ -def workflow_process_document( +def fra_process_document( document_file, document_data, model_id: str = "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf", @@ -37,7 +38,7 @@ def workflow_process_document( model = load_llama_cpp_model(model_id) logger.info("Splitting document into sections") - sections_dir = "sections" + sections_dir = Path("sections") / Path(document_file).stem document_to_sections_dir(document_file, sections_dir) logger.info("Predicting") diff --git a/benchmark/gemini.py b/src/structured_qa/benchmark/gemini.py similarity index 100% rename from benchmark/gemini.py rename to src/structured_qa/benchmark/gemini.py diff --git a/benchmark/run_benchmark.py b/src/structured_qa/benchmark/run_benchmark.py similarity index 70% rename from benchmark/run_benchmark.py rename to src/structured_qa/benchmark/run_benchmark.py index 0013167..c8d4d25 100644 --- a/benchmark/run_benchmark.py +++ b/src/structured_qa/benchmark/run_benchmark.py @@ -1,15 +1,12 @@ from pathlib import Path from urllib.request import urlretrieve +from typing import Callable import pandas as pd from fire import Fire from loguru import logger -from gemini import gemini_process_document -from workflow import workflow_process_document - - def download_document(url, output_file): if not Path(output_file).exists(): urlretrieve(url, output_file) @@ -19,7 +16,9 @@ def download_document(url, output_file): @logger.catch(reraise=True) -def run_benchmark(input_data: str, output_file: str, model: str): +def run_benchmark( + input_data: str, output_file: str, process_document: Callable, **kwargs +): logger.info("Loading input data") data = pd.read_csv(input_data) data["pred_answer"] = [None] * len(data) @@ -30,14 +29,9 @@ def run_benchmark(input_data: str, output_file: str, model: str): downloaded_document = Path(f"{Path(document_link).name}.pdf") download_document(document_link, downloaded_document) - if model == "gemini": - answers, sections = gemini_process_document( - downloaded_document, document_data - ) - elif model == "workflow": - answers, sections = workflow_process_document( - downloaded_document, document_data - ) + answers, sections = process_document( + downloaded_document, document_data, **kwargs + ) for index in document_data.index: data.loc[index, "pred_answer"] = str(answers[index]).upper() From d7e99e75694c733341d756ac8c1b2dd9b214d7d1 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 19:05:52 +0100 Subject: [PATCH 033/120] Add fallback for out of context --- src/structured_qa/workflow.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 1e54fed..9266ef0 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -70,8 +70,12 @@ def find_retrieve_answer( {"role": "user", "content": question}, ] - result = model.create_chat_completion(messages) - result = result["choices"][0]["message"]["content"] + try: + result = model.create_chat_completion(messages) + result = result["choices"][0]["message"]["content"] + except ValueError: + logger.error("Failed to generate completion") + return None, sections_checked logger.debug(f"Result: {result}") From 0f381bbb397eaa771bf3e6d384b65da8bfd4461b Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 19:22:47 +0100 Subject: [PATCH 034/120] Update with debugging info --- src/structured_qa/benchmark/find_retrieve_answer.py | 2 +- src/structured_qa/workflow.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/structured_qa/benchmark/find_retrieve_answer.py b/src/structured_qa/benchmark/find_retrieve_answer.py index ba2b606..54b1b3b 100644 --- a/src/structured_qa/benchmark/find_retrieve_answer.py +++ b/src/structured_qa/benchmark/find_retrieve_answer.py @@ -48,7 +48,7 @@ def fra_process_document( question = row["question"] try: float(row["answer"]) - answer_type = "a single number" + answer_type = "a number" except ValueError: if row["answer"] in ("YES", "NO"): answer_type = "YES or NO" diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 9266ef0..ce0249c 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -75,7 +75,7 @@ def find_retrieve_answer( result = result["choices"][0]["message"]["content"] except ValueError: logger.error("Failed to generate completion") - return None, sections_checked + return "Generation Error", sections_checked logger.debug(f"Result: {result}") @@ -89,7 +89,7 @@ def find_retrieve_answer( sections_checked.append(result) else: logger.error(f"Unknown section: {result}") - return None, sections_checked + return "Unknown section", sections_checked else: if result == "I need more info.": current_info = None From a0391a4e06079db0760dc058388f4245c50d5c98 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 21 Jan 2025 19:25:09 +0100 Subject: [PATCH 035/120] Update --- benchmark/benchmark.ipynb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index 0d33916..a7b0a5f 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -96,6 +96,16 @@ "# Find Retrieve Answer" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, { "cell_type": "code", "execution_count": null, @@ -116,6 +126,7 @@ " input_data=\"structured_qa.csv\",\n", " output_file=\"fra_results.csv\",\n", " process_document=fra_process_document,\n", + " model_id=\"bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q8_0.gguf\"\n", ")" ] }, From c3182cbec1ca8d07fc0dfc639aa4fd23d48d136d Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 11:24:54 +0100 Subject: [PATCH 036/120] Update with mit-1 --- benchmark/structured_qa.csv | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 212b682..99a033b 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -70,3 +70,8 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rule https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,BUILD A LOCATION,How many victory points are granted by a built Field Location card that work as an upgrade?,1 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTIONS,Can you use the raid action without a Raze token?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,Can the game end in a tie?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,Can the game end in a tie?,YES +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,4.4 Toilets,"Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold",B +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,CARBON MONOXIDE DETECTION AND VENTING,"In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground",C +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,Natural lighting,"What is the daylight factor required for façades with exterior obstructions?",0.7 +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,Internal partitions and doors,"What fire resitance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A From 20b1651287bfc2af0029e0d860f52efcf983ecb0 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 13:50:21 +0100 Subject: [PATCH 037/120] test unsloth --- benchmark/benchmark.ipynb | 11 +++++- benchmark/structured_qa.csv | 2 +- pyproject.toml | 1 + .../benchmark/find_retrieve_answer.py | 6 +--- src/structured_qa/model_loaders.py | 36 +++++++++++++++++++ 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index a7b0a5f..244b765 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -103,6 +103,7 @@ "outputs": [], "source": [ "import os\n", + "\n", "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, @@ -112,10 +113,18 @@ "metadata": {}, "outputs": [], "source": [ + "from structured_qa.model_loaders import load_unsloth_model\n", "from structured_qa.benchmark.run_benchmark import run_benchmark\n", "from structured_qa.benchmark.find_retrieve_answer import fra_process_document" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -126,7 +135,7 @@ " input_data=\"structured_qa.csv\",\n", " output_file=\"fra_results.csv\",\n", " process_document=fra_process_document,\n", - " model_id=\"bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q8_0.gguf\"\n", + " model=load_unsloth_model(\"unsloth/Meta-Llama-3.1-8B-Instruct\", \"llama-3.1\"),\n", ")" ] }, diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 99a033b..383baf9 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -74,4 +74,4 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rule https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,4.4 Toilets,"Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold",B https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,CARBON MONOXIDE DETECTION AND VENTING,"In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground",C https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,Natural lighting,"What is the daylight factor required for façades with exterior obstructions?",0.7 -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,Internal partitions and doors,"What fire resitance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,Internal partitions and doors,"What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A diff --git a/pyproject.toml b/pyproject.toml index 75d203c..ebdabbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "pymupdf4llm", "pyyaml", "streamlit", + "unsloth", ] [project.optional-dependencies] diff --git a/src/structured_qa/benchmark/find_retrieve_answer.py b/src/structured_qa/benchmark/find_retrieve_answer.py index 54b1b3b..c852971 100644 --- a/src/structured_qa/benchmark/find_retrieve_answer.py +++ b/src/structured_qa/benchmark/find_retrieve_answer.py @@ -2,7 +2,6 @@ from loguru import logger from structured_qa.config import FIND_PROMPT -from structured_qa.model_loaders import load_llama_cpp_model from structured_qa.preprocessing import document_to_sections_dir from structured_qa.workflow import find_retrieve_answer @@ -30,13 +29,10 @@ def fra_process_document( document_file, document_data, - model_id: str = "bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf", + model, find_prompt: str = FIND_PROMPT, answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, ): - logger.info("Creating model") - model = load_llama_cpp_model(model_id) - logger.info("Splitting document into sections") sections_dir = Path("sections") / Path(document_file).stem document_to_sections_dir(document_file, sections_dir) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 8a6d587..0340a98 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -1,5 +1,8 @@ import subprocess + from llama_cpp import Llama +from unsloth import FastLanguageModel +from unsloth.chat_templates import get_chat_template def gpu_available(): @@ -33,3 +36,36 @@ def load_llama_cpp_model(model_id: str) -> Llama: n_gpu_layers=-1 if gpu_available() else 0, ) return model + + +class UnslothModel: + def __init__(self, model, tokenizer): + self.model = model + self.tokenizer = tokenizer + + def create_chat_completion(self, messages): + inputs = self.tokenizer.apply_chat_template( + messages, + tokenize=True, + add_generation_prompt=True, + return_tensors="pt", + ).to("cuda") + outputs = self.model.generate(input_ids=inputs) + response = self.tokenizer.batch_decode(outputs[:, len(inputs[0]) :])[0] + return response + + +def load_unsloth_model( + model_id: str, chat_template: str, load_in_4bit: bool = True, **kwargs +) -> UnslothModel: + model, tokenizer = FastLanguageModel.from_pretrained( + model_name=model_id, + load_in_4bit=load_in_4bit, + **kwargs, + ) + tokenizer = get_chat_template( + tokenizer, + chat_template=chat_template, + ) + FastLanguageModel.for_inference(model) + return UnslothModel(model=model, tokenizer=tokenizer) From 0dd98da674fc060be1df62f7014bc60041edffb0 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 13:53:34 +0100 Subject: [PATCH 038/120] Add , skip_special_tokens = True --- src/structured_qa/model_loaders.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 0340a98..7bfeb4c 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -51,7 +51,9 @@ def create_chat_completion(self, messages): return_tensors="pt", ).to("cuda") outputs = self.model.generate(input_ids=inputs) - response = self.tokenizer.batch_decode(outputs[:, len(inputs[0]) :])[0] + response = self.tokenizer.batch_decode( + outputs[:, len(inputs[0]) :], skip_special_tokens=True + )[0] return response From 6ac29aa388643e5d9e3914cef69eae2ae3b2ab50 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 13:58:51 +0100 Subject: [PATCH 039/120] Update --- src/structured_qa/model_loaders.py | 24 +++++++++++++++++------- src/structured_qa/workflow.py | 23 +++++++++++------------ 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 7bfeb4c..6ab4d2c 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -1,9 +1,5 @@ import subprocess -from llama_cpp import Llama -from unsloth import FastLanguageModel -from unsloth.chat_templates import get_chat_template - def gpu_available(): try: @@ -13,7 +9,16 @@ def gpu_available(): return False -def load_llama_cpp_model(model_id: str) -> Llama: +class LlamaModel: + def __init__(self, model): + self.model = model + + def get_response(self, messages): + result = self.model.create_chat_completion(messages) + return result["choices"][0]["message"]["content"] + + +def load_llama_cpp_model(model_id: str) -> LlamaModel: """ Loads the given model_id using Llama.from_pretrained. @@ -27,6 +32,8 @@ def load_llama_cpp_model(model_id: str) -> Llama: Returns: Llama: The loaded model. """ + from llama_cpp import Llama + org, repo, filename = model_id.split("/") model = Llama.from_pretrained( repo_id=f"{org}/{repo}", @@ -35,7 +42,7 @@ def load_llama_cpp_model(model_id: str) -> Llama: verbose=False, n_gpu_layers=-1 if gpu_available() else 0, ) - return model + return LlamaModel(model=model) class UnslothModel: @@ -43,7 +50,7 @@ def __init__(self, model, tokenizer): self.model = model self.tokenizer = tokenizer - def create_chat_completion(self, messages): + def get_response(self, messages): inputs = self.tokenizer.apply_chat_template( messages, tokenize=True, @@ -60,6 +67,9 @@ def create_chat_completion(self, messages): def load_unsloth_model( model_id: str, chat_template: str, load_in_4bit: bool = True, **kwargs ) -> UnslothModel: + from unsloth import FastLanguageModel + from unsloth.chat_templates import get_chat_template + model, tokenizer = FastLanguageModel.from_pretrained( model_name=model_id, load_in_4bit=load_in_4bit, diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index ce0249c..b2b4ced 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -71,29 +71,28 @@ def find_retrieve_answer( ] try: - result = model.create_chat_completion(messages) - result = result["choices"][0]["message"]["content"] + response = model.get_response(messages) except ValueError: logger.error("Failed to generate completion") return "Generation Error", sections_checked - logger.debug(f"Result: {result}") + logger.debug(f"Result: {response}") if finding_section: - result = result.strip() - logger.info(f"Retrieving section: {result}") - if result in sections_names: - section_content = (sections_dir / f"{result}.txt").read_text() - current_section = result + response = response.strip() + logger.info(f"Retrieving section: {response}") + if response in sections_names: + section_content = (sections_dir / f"{response}.txt").read_text() + current_section = response current_info = section_content - sections_checked.append(result) + sections_checked.append(response) else: - logger.error(f"Unknown section: {result}") + logger.error(f"Unknown section: {response}") return "Unknown section", sections_checked else: - if result == "I need more info.": + if response == "I need more info.": current_info = None sections_names.remove(current_section) continue else: - return result, sections_checked + return response, sections_checked From 95b3d57a0e180aa457b2c75659482f87b9ce5de2 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 14:09:42 +0100 Subject: [PATCH 040/120] Updates --- src/structured_qa/benchmark/find_retrieve_answer.py | 5 +++-- src/structured_qa/config.py | 9 +++++---- src/structured_qa/workflow.py | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/structured_qa/benchmark/find_retrieve_answer.py b/src/structured_qa/benchmark/find_retrieve_answer.py index c852971..075b149 100644 --- a/src/structured_qa/benchmark/find_retrieve_answer.py +++ b/src/structured_qa/benchmark/find_retrieve_answer.py @@ -33,9 +33,10 @@ def fra_process_document( find_prompt: str = FIND_PROMPT, answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, ): - logger.info("Splitting document into sections") sections_dir = Path("sections") / Path(document_file).stem - document_to_sections_dir(document_file, sections_dir) + if not sections_dir.exists(): + logger.info("Splitting document into sections") + document_to_sections_dir(document_file, sections_dir) logger.info("Predicting") answers = {} diff --git a/src/structured_qa/config.py b/src/structured_qa/config.py index f1d4069..e160b67 100644 --- a/src/structured_qa/config.py +++ b/src/structured_qa/config.py @@ -6,22 +6,23 @@ FIND_PROMPT = """ You are given two pieces of information: -1. A user question. -2. A list of valid section names. +1. A list of valid section names. +2. A user question. Your task is to: - Identify exactly one `section_name` from the provided list that seems related to the user question. - Return the `section_name` exactly as it appears in the list. +- Do NOT answer the question. - Do NOT return any additional text, explanation, or formatting. - Do NOT combine multiple section names into a single response. -Here is the list of valid `section_names`: +Here is the list of valid section names: ``` {SECTIONS} ``` -Now, based on the input question, return the single most relevant `section_name` from the list. +Now, based on the following question, return the single most relevant `section_name` from the list. """ ANSWER_PROMPT = """ diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index b2b4ced..0c802cb 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -72,7 +72,7 @@ def find_retrieve_answer( try: response = model.get_response(messages) - except ValueError: + except Exception: logger.error("Failed to generate completion") return "Generation Error", sections_checked From d946f81c716089dc6a2b4dabdd6416b4441533e5 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:16:45 +0100 Subject: [PATCH 041/120] Add full_context --- src/structured_qa/benchmark/full_context.py | 58 +++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 src/structured_qa/benchmark/full_context.py diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py new file mode 100644 index 0000000..8c47ee0 --- /dev/null +++ b/src/structured_qa/benchmark/full_context.py @@ -0,0 +1,58 @@ +from pathlib import Path +from loguru import logger + + +ANSWER_WITH_TYPE_PROMPT = """ +You are a rigorous assistant answering questions. +You only answer based on the current information available. +You should only answer with ANSWER_TYPE. + +The current information available is: + +``` +{CURRENT_INFO} +``` +""" + + +def fra_process_document( + document_file, + document_data, + model, + answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, +): + document = Path(document_file).read_text().strip() + + logger.info("Predicting") + answers = {} + sections = {} + for index, row in document_data.iterrows(): + question = row["question"] + try: + float(row["answer"]) + answer_type = "a number" + except ValueError: + if row["answer"] in ("YES", "NO"): + answer_type = "YES or NO" + else: + answer_type = "a single letter" + + answer_prompt = answer_prompt.replace("ANSWER_TYPE", answer_type) + + logger.info(f"Question: {question}") + messages = [ + { + "role": "system", + "content": answer_prompt.format(CURRENT_INFO="\n".join(document)), + }, + {"role": "user", "content": question}, + ] + try: + response = model.get_response(messages) + except Exception: + logger.error("Failed to generate completion") + return "Generation Error", [] + answers[index] = response + sections[index] = None + + return answers, sections From 4ea1f7dd6083483c0a9d1f6e9afae142ac188acf Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:28:19 +0100 Subject: [PATCH 042/120] Update full context --- src/structured_qa/benchmark/full_context.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py index 8c47ee0..32aeb3a 100644 --- a/src/structured_qa/benchmark/full_context.py +++ b/src/structured_qa/benchmark/full_context.py @@ -1,4 +1,4 @@ -from pathlib import Path +import pymupdf4llm from loguru import logger @@ -15,13 +15,13 @@ """ -def fra_process_document( +def full_context_process_document( document_file, document_data, model, answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, ): - document = Path(document_file).read_text().strip() + md_text = pymupdf4llm.to_markdown(document_file) logger.info("Predicting") answers = {} @@ -43,7 +43,7 @@ def fra_process_document( messages = [ { "role": "system", - "content": answer_prompt.format(CURRENT_INFO="\n".join(document)), + "content": answer_prompt.format(CURRENT_INFO="\n".join(md_text)), }, {"role": "user", "content": question}, ] From a4888f2f7dea54ff1ceab99b20146c68dcd951a2 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:30:36 +0100 Subject: [PATCH 043/120] update --- src/structured_qa/benchmark/full_context.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py index 32aeb3a..0b6a160 100644 --- a/src/structured_qa/benchmark/full_context.py +++ b/src/structured_qa/benchmark/full_context.py @@ -49,9 +49,10 @@ def full_context_process_document( ] try: response = model.get_response(messages) - except Exception: + except Exception as e: + logger.exception(e) logger.error("Failed to generate completion") - return "Generation Error", [] + return "Generation Error", [None] answers[index] = response sections[index] = None From e0f3a820fa891895fcea38cbe3d6a7016b21e5b7 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:36:26 +0100 Subject: [PATCH 044/120] Add load and clean --- src/structured_qa/benchmark/full_context.py | 29 ++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py index 0b6a160..dbf424a 100644 --- a/src/structured_qa/benchmark/full_context.py +++ b/src/structured_qa/benchmark/full_context.py @@ -1,4 +1,6 @@ -import pymupdf4llm +import re + +import PyPDF2 from loguru import logger @@ -15,13 +17,34 @@ """ +def load_pdf(pdf_file: str) -> str | None: + try: + pdf_reader = PyPDF2.PdfReader(pdf_file) + return "\n".join(page.extract_text() for page in pdf_reader.pages) + except Exception as e: + logger.exception(e) + return None + + +def clean_with_regex(text: str) -> str: + text = re.sub( + r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", + "", + text, + ) + text = re.sub(r"[\w\.-]+@[\w\.-]+\.[\w]+", "", text) + text = re.sub(r'[^a-zA-Z0-9\s.,!?;:"\']', "", text) + text = re.sub(r"\s+", " ", text).strip() + return text + + def full_context_process_document( document_file, document_data, model, answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, ): - md_text = pymupdf4llm.to_markdown(document_file) + document = clean_with_regex(load_pdf(document_file)) logger.info("Predicting") answers = {} @@ -43,7 +66,7 @@ def full_context_process_document( messages = [ { "role": "system", - "content": answer_prompt.format(CURRENT_INFO="\n".join(md_text)), + "content": answer_prompt.format(CURRENT_INFO="\n".join(document)), }, {"role": "user", "content": question}, ] From 906c8d9ea79e66e5b3ac7a94e9bc21b91926ea66 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:47:26 +0100 Subject: [PATCH 045/120] Update --- src/structured_qa/benchmark/full_context.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py index dbf424a..5152561 100644 --- a/src/structured_qa/benchmark/full_context.py +++ b/src/structured_qa/benchmark/full_context.py @@ -46,6 +46,14 @@ def full_context_process_document( ): document = clean_with_regex(load_pdf(document_file)) + max_characters = model.n_ctx() * 4 + if len(document) > max_characters: + logger.warning( + f"Input text is too big ({len(document)})." + f" Using only a subset of it ({max_characters})." + ) + document = document[:max_characters] + logger.info("Predicting") answers = {} sections = {} @@ -75,7 +83,7 @@ def full_context_process_document( except Exception as e: logger.exception(e) logger.error("Failed to generate completion") - return "Generation Error", [None] + response = "Generation Error" answers[index] = response sections[index] = None From bb2afe5316c6e610f09b5feb49426b28300469a7 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:49:04 +0100 Subject: [PATCH 046/120] Update --- src/structured_qa/benchmark/full_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py index 5152561..e385822 100644 --- a/src/structured_qa/benchmark/full_context.py +++ b/src/structured_qa/benchmark/full_context.py @@ -46,7 +46,7 @@ def full_context_process_document( ): document = clean_with_regex(load_pdf(document_file)) - max_characters = model.n_ctx() * 4 + max_characters = model.model.n_ctx() * 4 if len(document) > max_characters: logger.warning( f"Input text is too big ({len(document)})." From 51c31f7b7806251f09664a3366928738201a92e9 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:52:30 +0100 Subject: [PATCH 047/120] print --- src/structured_qa/benchmark/full_context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py index e385822..0347091 100644 --- a/src/structured_qa/benchmark/full_context.py +++ b/src/structured_qa/benchmark/full_context.py @@ -46,6 +46,7 @@ def full_context_process_document( ): document = clean_with_regex(load_pdf(document_file)) + logger.info(f"Length of the document: {len(document)}") max_characters = model.model.n_ctx() * 4 if len(document) > max_characters: logger.warning( From c5e0ac4bfa312beab3d5c87bbe1b1b01ca9b1a4e Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 16:53:18 +0100 Subject: [PATCH 048/120] Update --- src/structured_qa/benchmark/full_context.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py index 0347091..b3bcc2b 100644 --- a/src/structured_qa/benchmark/full_context.py +++ b/src/structured_qa/benchmark/full_context.py @@ -82,8 +82,7 @@ def full_context_process_document( try: response = model.get_response(messages) except Exception as e: - logger.exception(e) - logger.error("Failed to generate completion") + logger.error(f"Failed to generate completion: {e}") response = "Generation Error" answers[index] = response sections[index] = None From cc10a9d2ecd156547464306d0586f4e534e3dad8 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 18:58:17 +0100 Subject: [PATCH 049/120] Add load_gemini_model --- src/structured_qa/benchmark/gemini.py | 48 +++++++++------------------ src/structured_qa/model_loaders.py | 33 ++++++++++++++++++ 2 files changed, 49 insertions(+), 32 deletions(-) diff --git a/src/structured_qa/benchmark/gemini.py b/src/structured_qa/benchmark/gemini.py index 70dc927..f20f6e2 100644 --- a/src/structured_qa/benchmark/gemini.py +++ b/src/structured_qa/benchmark/gemini.py @@ -5,7 +5,8 @@ import google.generativeai as genai from loguru import logger -SYSTEM_PROMPT = """ + +FULL_CONTEXT_PROMPT = """ You are given an input document and a question. You can only answer the question based on the information in the document. You will return a JSON name with two keys: "section" and "answer". @@ -35,11 +36,10 @@ """ -def gemini_process_document( +def gemini_full_context_process_document( document_file, document_data, - model_name: str = "gemini-2.0-flash-exp", - system_prompt: str = SYSTEM_PROMPT, + model, ): genai.configure(api_key=os.environ["GEMINI_API_KEY"]) @@ -50,19 +50,6 @@ def gemini_process_document( time.sleep(2) file = genai.get_file(file.name) - logger.info("Creating model") - model = genai.GenerativeModel( - model_name=model_name, - generation_config={ - "temperature": 1, - "top_p": 0.95, - "top_k": 40, - "max_output_tokens": 8192, - "response_mime_type": "application/json", - }, - system_instruction=system_prompt, - ) - logger.info("Predicting") n = 0 answers = {} @@ -73,21 +60,18 @@ def gemini_process_document( time.sleep(60) question = row["question"] logger.debug(f"Question: {question}") - chat_session = model.start_chat( - history=[ - { - "role": "user", - "parts": [ - file, - question, - ], - } - ] - ) - - response = chat_session.send_message("INSERT_INPUT_HERE") - logger.debug(response.text) - response_json = json.loads(response.text) + messages = [ + { + "role": "user", + "parts": [ + file, + question, + ], + } + ] + response = model.get_response(messages) + logger.debug(response) + response_json = json.loads(response) answers[index] = response_json["answer"] sections[index] = response_json["section"] n += 1 diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 6ab4d2c..4ebe786 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -81,3 +81,36 @@ def load_unsloth_model( ) FastLanguageModel.for_inference(model) return UnslothModel(model=model, tokenizer=tokenizer) + + +class GeminiModel: + def __init__(self, model): + self.model = model + + def get_response(self, messages): + messages = [] + for message in messages: + messages.append( + { + "role": "user", + "parts": [ + message["content"] + if "content" in message + else message["parts"], + ], + } + ) + chat_session = self.model.start_chat(history=messages) + response = chat_session.send_message("INSERT_INPUT_HERE") + return response.text + + +def load_gemini_model(model_id: str, system_prompt: str, **kwargs) -> GeminiModel: + import google.generativeai as genai + + model = genai.GenerativeModel( + model_name=model_id, + system_instruction=system_prompt, + **kwargs, + ) + return GeminiModel(model=model) From 1560c717e1bae4236dd20bd7da5d4ed8ce3388ac Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 19:00:29 +0100 Subject: [PATCH 050/120] Add sleep --- src/structured_qa/model_loaders.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 4ebe786..9e6e290 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -1,4 +1,8 @@ import subprocess +import time + + +from loguru import logger def gpu_available(): @@ -86,6 +90,7 @@ def load_unsloth_model( class GeminiModel: def __init__(self, model): self.model = model + self.current_calls = 0 def get_response(self, messages): messages = [] @@ -100,8 +105,13 @@ def get_response(self, messages): ], } ) + if self.current_calls >= 10: + logger.info("Waiting for 60 seconds") + time.sleep(60) + self.current_calls = 0 chat_session = self.model.start_chat(history=messages) response = chat_session.send_message("INSERT_INPUT_HERE") + self.current_calls += 1 return response.text From 94e7580d85e02b0a59d8f8f7a7576dd1fb316652 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 19:13:01 +0100 Subject: [PATCH 051/120] Update get_response --- src/structured_qa/model_loaders.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 9e6e290..2b071a1 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -94,23 +94,14 @@ def __init__(self, model): def get_response(self, messages): messages = [] - for message in messages: - messages.append( - { - "role": "user", - "parts": [ - message["content"] - if "content" in message - else message["parts"], - ], - } - ) + stacked_message = "\n".join( + message["content"] for message in messages + ) if self.current_calls >= 10: logger.info("Waiting for 60 seconds") time.sleep(60) self.current_calls = 0 - chat_session = self.model.start_chat(history=messages) - response = chat_session.send_message("INSERT_INPUT_HERE") + response = self.model.generate_content(stacked_message) self.current_calls += 1 return response.text From e7b5d5bca9fc8112135740515b8eaa7c7ab956f3 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 19:13:17 +0100 Subject: [PATCH 052/120] Update --- src/structured_qa/model_loaders.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 2b071a1..88ca7a0 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -94,9 +94,7 @@ def __init__(self, model): def get_response(self, messages): messages = [] - stacked_message = "\n".join( - message["content"] for message in messages - ) + stacked_message = "\n".join(message["content"] for message in messages) if self.current_calls >= 10: logger.info("Waiting for 60 seconds") time.sleep(60) From 5f6443b8f026aeb02102874cdcbea60c42198cb6 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 19:15:00 +0100 Subject: [PATCH 053/120] Log error --- src/structured_qa/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 0c802cb..ec22681 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -72,8 +72,8 @@ def find_retrieve_answer( try: response = model.get_response(messages) - except Exception: - logger.error("Failed to generate completion") + except Exception as e: + logger.error(f"Failed to generate completion: {e}") return "Generation Error", sections_checked logger.debug(f"Result: {response}") From 819c6b2e6e39a7bfdc5ae2138e6ec908f7b8adfc Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 22 Jan 2025 19:17:00 +0100 Subject: [PATCH 054/120] fix --- src/structured_qa/model_loaders.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 88ca7a0..1a62837 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -93,7 +93,6 @@ def __init__(self, model): self.current_calls = 0 def get_response(self, messages): - messages = [] stacked_message = "\n".join(message["content"] for message in messages) if self.current_calls >= 10: logger.info("Waiting for 60 seconds") From 5625c39895944de231cfe8a6ece776cc1f7a7700 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 23 Jan 2025 10:36:30 +0100 Subject: [PATCH 055/120] Make the more info check more flexible --- src/structured_qa/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index ec22681..8de335b 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -90,7 +90,7 @@ def find_retrieve_answer( logger.error(f"Unknown section: {response}") return "Unknown section", sections_checked else: - if response == "I need more info.": + if "MORE INFO" in response.upper(): current_info = None sections_names.remove(current_section) continue From d125b79bb7bfdeab751f93bac37039950fe24ce5 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 23 Jan 2025 10:53:36 +0100 Subject: [PATCH 056/120] Add gemini_full_context notebook --- benchmark/gemini_full_context.ipynb | 296 ++++++++++++++++++++++++++ src/structured_qa/benchmark/gemini.py | 78 ------- 2 files changed, 296 insertions(+), 78 deletions(-) create mode 100644 benchmark/gemini_full_context.ipynb diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb new file mode 100644 index 0000000..a3ec236 --- /dev/null +++ b/benchmark/gemini_full_context.ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", + "genai.configure(api_key=\"GEMINI_API_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "from loguru import logger\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", + " logger.info(\"Uploading file\")\n", + " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", + " while file.state.name == \"PROCESSING\":\n", + " logger.debug(\"Waiting for file to be processed.\")\n", + " time.sleep(2)\n", + " file = genai.get_file(file.name)\n", + "\n", + " logger.info(\"Predicting\")\n", + " n = 0\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " if n > 0 and n % 9 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.debug(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"parts\": [\n", + " file,\n", + " question,\n", + " ],\n", + " }\n", + " ]\n", + " response = model.get_response(messages)\n", + " logger.debug(response)\n", + " response_json = json.loads(response)\n", + " answers[index] = response_json[\"answer\"]\n", + " sections[index] = response_json[\"section\"]\n", + " n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to Download Document" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "\n", + "def download_document(url, output_file):\n", + " if not Path(output_file).exists():\n", + " urlretrieve(url, output_file)\n", + " logger.debug(f\"Downloaded {url} to {output_file}\")\n", + " else:\n", + " logger.debug(f\"File {output_file} already exists\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FULL_CONTEXT_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with two keys: \"section\" and \"answer\".\n", + "In `\"section\"`, you will return the name of the section where you found the answer.\n", + "In `\"answer\"`, you will return the answer one of the following JSON:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model?\n", + "-A: ReLU\n", + "-B: Sigmoid\n", + "-C: Tanh\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=FULL_CONTEXT_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " download_document(document_link, downloaded_document)\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/structured_qa/benchmark/gemini.py b/src/structured_qa/benchmark/gemini.py index f20f6e2..e69de29 100644 --- a/src/structured_qa/benchmark/gemini.py +++ b/src/structured_qa/benchmark/gemini.py @@ -1,78 +0,0 @@ -import json -import os -import time - -import google.generativeai as genai -from loguru import logger - - -FULL_CONTEXT_PROMPT = """ -You are given an input document and a question. -You can only answer the question based on the information in the document. -You will return a JSON name with two keys: "section" and "answer". -In `"section"`, you will return the name of the section where you found the answer. -In `"answer"`, you will return the answer one of the following JSON: -- Yes/No (for boolean questions) -Is the model an LLM? -{ - "section": "1. Introduction", - "answer": "No" -} -- Single number (for numeric questions) -How many layers does the model have? -{ - "section": "2. Architecture", - "answer": 12 -} -- Single letter (for multiple-choice questions) -What is the activation function used in the model? --A: ReLU --B: Sigmoid --C: Tanh -{ - "section": "2. Architecture", - "answer": "C" -} -""" - - -def gemini_full_context_process_document( - document_file, - document_data, - model, -): - genai.configure(api_key=os.environ["GEMINI_API_KEY"]) - - logger.info("Uploading file") - file = genai.upload_file(document_file, mime_type="application/pdf") - while file.state.name == "PROCESSING": - logger.debug("Waiting for file to be processed.") - time.sleep(2) - file = genai.get_file(file.name) - - logger.info("Predicting") - n = 0 - answers = {} - sections = {} - for index, row in document_data.iterrows(): - if n > 0 and n % 9 == 0: - logger.info("Waiting for 60 seconds") - time.sleep(60) - question = row["question"] - logger.debug(f"Question: {question}") - messages = [ - { - "role": "user", - "parts": [ - file, - question, - ], - } - ] - response = model.get_response(messages) - logger.debug(response) - response_json = json.loads(response) - answers[index] = response_json["answer"] - sections[index] = response_json["section"] - n += 1 - return answers, sections From 88a9357be58540c2a2bd16e1cae7540458db9161 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 23 Jan 2025 11:10:02 +0100 Subject: [PATCH 057/120] typo --- benchmark/structured_qa.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 383baf9..29944d1 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -37,7 +37,7 @@ https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Allian https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 2 Benefits of Open Access,what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years,A https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,Does open access eliminate price barriers?,YES https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chatper 1 What is this guide and who is it for,Are publication fees required for all open access journals?,NO -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bull and Melinda Gates foundation implement an open access policy?,2015 +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bill and Melinda Gates foundation implement an open access policy?,2015 https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,NO https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,NO https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,5 From d929a80fbfdbf5fb833294b2e324a8ce503eb72a Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 23 Jan 2025 11:10:10 +0100 Subject: [PATCH 058/120] Check por API KEY --- benchmark/gemini_full_context.ipynb | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index a3ec236..51ee666 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -62,8 +62,11 @@ "import os\n", "import google.generativeai as genai\n", "\n", + "GEMINI_API_KEY = None\n", + "if not GEMINI_API_KEY:\n", + " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", - "genai.configure(api_key=\"GEMINI_API_KEY\")" + "genai.configure(api_key=GEMINI_API_KEY)" ] }, { @@ -115,19 +118,10 @@ " logger.info(\"Waiting for 60 seconds\")\n", " time.sleep(60)\n", " question = row[\"question\"]\n", - " logger.debug(f\"Question: {question}\")\n", - " messages = [\n", - " {\n", - " \"role\": \"user\",\n", - " \"parts\": [\n", - " file,\n", - " question,\n", - " ],\n", - " }\n", - " ]\n", - " response = model.get_response(messages)\n", - " logger.debug(response)\n", - " response_json = json.loads(response)\n", + " logger.info(f\"Question: {question}\")\n", + " response = model.model.generate_content([file, question])\n", + " logger.info(response.text)\n", + " response_json = json.loads(response.text)\n", " answers[index] = response_json[\"answer\"]\n", " sections[index] = response_json[\"section\"]\n", " n += 1\n", From 9e718b3be98e38ab376429b374956ec2dda07cb9 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 23 Jan 2025 11:44:16 +0100 Subject: [PATCH 059/120] Update with outputs --- benchmark/gemini_full_context.ipynb | 1684 ++++++++++++++++++++++----- 1 file changed, 1397 insertions(+), 287 deletions(-) diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index 51ee666..e26c5d0 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -1,290 +1,1400 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Structured Q&A" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "QrgOGtuGlyhT", + "outputId": "62d28154-d186-4417-b032-6701fd174ecd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-e3shdxjv\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-e3shdxjv\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit d125b79bb7bfdeab751f93bac37039950fe24ce5\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (0.27.1)\n", + "Collecting llama-cpp-python (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting loguru (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (2.10.5)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (6.0.2)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Collecting unsloth (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev56+gd125b79) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (1.26.4)\n", + "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.1.5)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (8.1.8)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (6.3.3)\n", + "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", + "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.5.1+cu121)\n", + "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", + "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", + "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.1.0)\n", + "Collecting tyro (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading tyro-0.9.12-py3-none-any.whl.metadata (9.4 kB)\n", + "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.47.1)\n", + "Collecting datasets>=2.16.0 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (5.9.5)\n", + "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.45.1)\n", + "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.2.1)\n", + "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.14.0)\n", + "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", + "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.5.2)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.21.1)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Collecting xxhash (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec>=2023.5.0 (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.11.11)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.0.12)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.0.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.18.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.4.2)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.13.1)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.6.85)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.21.0)\n", + "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", + "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.16)\n", + "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.18.3)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.0.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m288.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tyro-0.9.12-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", + "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire, llama-cpp-python\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev56+gd125b79-py3-none-any.whl size=16254 sha256=44ec8b803647a36e38d429c99dca9a41465cabf732b89ee30d835b5bf7ef397a\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-7rr3qwwf/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ae95bde960b4f7822e181432b7e15ac481d7e46da0b22e2e1070da6763218641\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4022116 sha256=7fdfa0bf98f5cf71204e7497d168a59fa164e54a9c7adecfd4bef61dd1277b44\n", + " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", + "Successfully built structured-qa fire llama-cpp-python\n", + "Installing collected packages: xxhash, watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fsspec, fire, diskcache, dill, pymupdf4llm, pydeck, multiprocess, llama-cpp-python, tyro, xformers, datasets, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 4.25.5\n", + " Uninstalling protobuf-4.25.5:\n", + " Successfully uninstalled protobuf-4.25.5\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n", + "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-25.1.1 datasets-3.2.0 dill-0.3.8 diskcache-5.6.3 fire-0.7.0 fsspec-2024.9.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 multiprocess-0.70.16 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev56+gd125b79 trl-0.13.0 tyro-0.9.12 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1 xxhash-3.5.0\n" + ] + }, + { + "data": { + "application/vnd.colab-display-data+json": { + "id": "6b04f797f18f4c54948485ed45a8dacd", + "pip_warning": { + "packages": [ + "google" + ] + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S22kTrfPlyhU", + "outputId": "f77e1fb9-837a-4674-85f4-bc4e54815ed0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-01-23 10:00:12-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 14711 (14K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "structured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0.003s \n", + "\n", + "2025-01-23 10:00:19 (5.28 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "\n", + "GEMINI_API_KEY = None\n", + "if not GEMINI_API_KEY:\n", + " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", + "genai.configure(api_key=GEMINI_API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "6t5RZiODlyhW" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + " import json\n", + " import time\n", + "\n", + " from loguru import logger\n", + "\n", + "\n", + " def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + " ):\n", + " logger.info(\"Uploading file\")\n", + " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", + " while file.state.name == \"PROCESSING\":\n", + " logger.debug(\"Waiting for file to be processed.\")\n", + " time.sleep(2)\n", + " file = genai.get_file(file.name)\n", + "\n", + " logger.info(\"Predicting\")\n", + " n = 0\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " if n > 0 and n % 9 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " response = model.model.generate_content([file, question])\n", + " logger.info(response.text)\n", + " response_json = json.loads(response.text)\n", + " answers[index] = response_json[\"answer\"]\n", + " sections[index] = response_json[\"section\"]\n", + " n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TpP0aRX_lyhY" + }, + "source": [ + "## Function to Download Document" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "UCgjvJ3WlyhY" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "\n", + "def download_document(url, output_file):\n", + " if not Path(output_file).exists():\n", + " urlretrieve(url, output_file)\n", + " logger.debug(f\"Downloaded {url} to {output_file}\")\n", + " else:\n", + " logger.debug(f\"File {output_file} already exists\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "whtSJwdrlyhZ" + }, + "outputs": [], + "source": [ + "FULL_CONTEXT_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with two keys: \"section\" and \"answer\".\n", + "In `\"section\"`, you will return the name of the section where you found the answer.\n", + "In `\"answer\"`, you will return the answer one of the following JSON:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model?\n", + "-A: ReLU\n", + "-B: Sigmoid\n", + "-C: Tanh\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=FULL_CONTEXT_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "AZBwRnfjlyhZ", + "outputId": "1cea7c23-9edf-45dc-d58d-09ca5b41d193" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-23 10:03:52.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:52.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:52.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:56.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:56.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:03.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3 Model Architecture\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:03.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:10.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:14.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:21.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.2.2 Multi-Head Attention\",\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:21.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:28.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.4 Embeddings and Softmax\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:28.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:35.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"6.2 Model Variations\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:35.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:42.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2 Hardware and Schedule\",\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:42.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:47.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2 Hardware and Schedule\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:47.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:51.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.3 Optimizer\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:51.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:51.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:55.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.3 Optimizer\",\n", + " \"answer\": 4000\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:55.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:00.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Training\",\n", + " \"answer\": \"0.1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:00.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:01.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:04.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:14.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. OUR METHOD\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:14.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:22.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Practical Benefits and Limitations.\",\n", + " \"answer\": 0.85\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:22.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:30.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ABSTRACT\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:30.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:42.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"1. INTRODUCTION\",\n", + "\"answer\": \"175\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:42.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:52.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4.1 Low-Rank-Parametrized Update Matrices\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:52.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:53.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:56.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:56.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:04.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3 Arithmetic Reasoning\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:04.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:13.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.1 Experimental Setup\",\n", + "\"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:13.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:21.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.1 Experimental Setup\",\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:21.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:31.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"5 Symbolic Reasoning\",\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:31.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:41.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.4 Robustness of Chain of Thought\",\n", + "\"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:41.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many random samples for examined to understand model errors?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:50.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.2 Results\",\n", + " \"answer\": \"50\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5 Symbolic Reasoning\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:05.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:05.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:09.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:09.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the model use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:13.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. Decision Tree Analysis of Neural Networks\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:13.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parameters are in the y = x^2 toy model tree?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:16.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", + " \"answer\": \"39\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:16.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:20.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2.4. Recurrent Networks\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:23.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:27.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Conclusion\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:28.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:32.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:44.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:58.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:58.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:09.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"5. Acknowledgements\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:23.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:23.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:34.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:34.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In what year did the Bull and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:49.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Open Access Policies\",\n", + " \"answer\": 2015\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:49.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:01.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Chapter 5\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:01.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:02.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:06.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:20.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:37.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:37.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the daylight factor required for façades with exterior obstructions?\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:53.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. VISUAL COMFORT\",\n", + " \"answer\": \"0.7%\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:53.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:07.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:11.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:15.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:15.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:43.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 5\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:43.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:06.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 10\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 51\",\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:26.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:44.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. (29)\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:44.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:02.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 73\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:02.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 60\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:20.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:37.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 99\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:37.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:08.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 56\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:08.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 79\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:30.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:51.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 18\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:51.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:09.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 65\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:09.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:15.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:18.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:18.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:28.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"OVERVIEW AND GOAL\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:28.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:38.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:45.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"6\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:45.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Turn overview\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:54.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:03.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CHAPTER OVERVIEW\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:03.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:13.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"A. Take a Chapter card\",\n", + " \"answer\": \"3\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:13.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:21.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Chapter Overview\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:21.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:29.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CARD AND TILE COSTS\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:29.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:39.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. CARD AND TILE COSTS\",\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:39.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:39.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:48.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:48.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:57.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"CARD AND TILE EFFECTS\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:57.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:13.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:13.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:22.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:22.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:30.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": \"7\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:30.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:32.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:36.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:36.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:44.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"LOOKOUT PHASE\",\n", + " \"answer\": 4\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:44.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:51.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"LOOKOUT PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:51.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:58.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ACTION PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:58.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:05.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"EXPEDITION PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:05.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:12.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME FLOW\",\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:12.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:19.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME FLOW\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:19.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:27.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"9. ACTIONS\",\n", + " \"answer\": \"1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:27.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:36.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ACTIONS\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:36.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:43.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME END\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:43.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:21:43.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:21:50.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME END\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " download_document(document_link, downloaded_document)\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "id": "EYYJgWf6lyha", + "outputId": "347014fe-d6e6-4d0c-e094-dc5fcbed295e" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 5,\n \"max\": 74,\n \"num_unique_values\": 6,\n \"samples\": [\n 5,\n 13,\n 74\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"3 Model Architecture\",\n \"3 Experimental Results\",\n \"Natural lighting\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Does the final model use learned positional embeddings?\",\n \"How many parameters are in the y = x^2 toy model tree?\",\n \"What is the daylight factor required for fa\\u00e7ades with exterior obstructions?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"14\",\n \"0.7\",\n \"850\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"39\",\n \"0.7%\",\n \"0.85\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"6.2 Model Variations\",\n \"Table 1. Computation and memory analysis of toy problems.\",\n \"4. VISUAL COMFORT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
55https://arxiv.org/pdf/1706.037623 Model ArchitectureDoes the final model use learned positional em...NOYES6.2 Model Variations
1313https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many parameters are in the y = x^2 toy mod...1439Table 1. Computation and memory analysis of to...
1818https://arxiv.org/pdf/2106.09685v2.pdf5.5 Scaling Up to GPT-3How much memory is saved (in GB) when training...8500.854. Practical Benefits and Limitations.
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Prohibited AI Practices (Article 5)Which type of AI systems are banned by the AI ...CBArticle 5
3939https://authorsalliance.org/wp-content/uploads...Chapter 5 Where do you want to make your work ...Are Gold Open Access and Green Open Access mut...NOYESChapter 5
7474https://commission.europa.eu/document/download...Natural lightingWhat is the daylight factor required for façad...0.70.7%4. VISUAL COMFORT
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "5 5 https://arxiv.org/pdf/1706.03762 \n", + "13 13 https://arxiv.org/pdf/2210.05189 \n", + "18 18 https://arxiv.org/pdf/2106.09685v2.pdf \n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "39 39 https://authorsalliance.org/wp-content/uploads... \n", + "74 74 https://commission.europa.eu/document/download... \n", + "\n", + " section \\\n", + "5 3 Model Architecture \n", + "13 3 Experimental Results \n", + "18 5.5 Scaling Up to GPT-3 \n", + "22 Prohibited AI Practices (Article 5) \n", + "39 Chapter 5 Where do you want to make your work ... \n", + "74 Natural lighting \n", + "\n", + " question answer pred_answer \\\n", + "5 Does the final model use learned positional em... NO YES \n", + "13 How many parameters are in the y = x^2 toy mod... 14 39 \n", + "18 How much memory is saved (in GB) when training... 850 0.85 \n", + "22 Which type of AI systems are banned by the AI ... C B \n", + "39 Are Gold Open Access and Green Open Access mut... NO YES \n", + "74 What is the daylight factor required for façad... 0.7 0.7% \n", + "\n", + " pred_section \n", + "5 6.2 Model Variations \n", + "13 Table 1. Computation and memory analysis of to... \n", + "18 4. Practical Benefits and Limitations. \n", + "22 Article 5 \n", + "39 Chapter 5 \n", + "74 4. VISUAL COMFORT " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "36597dcf-ea15-414d-d66f-a4cb9102c4da" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9210526315789473" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "\n", - "GEMINI_API_KEY = None\n", - "if not GEMINI_API_KEY:\n", - " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", - "genai.configure(api_key=GEMINI_API_KEY)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to Process a single Document" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import time\n", - "\n", - "from loguru import logger\n", - "\n", - "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - "):\n", - " logger.info(\"Uploading file\")\n", - " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", - " while file.state.name == \"PROCESSING\":\n", - " logger.debug(\"Waiting for file to be processed.\")\n", - " time.sleep(2)\n", - " file = genai.get_file(file.name)\n", - "\n", - " logger.info(\"Predicting\")\n", - " n = 0\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " if n > 0 and n % 9 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " response = model.model.generate_content([file, question])\n", - " logger.info(response.text)\n", - " response_json = json.loads(response.text)\n", - " answers[index] = response_json[\"answer\"]\n", - " sections[index] = response_json[\"section\"]\n", - " n += 1\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to Download Document" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "\n", - "def download_document(url, output_file):\n", - " if not Path(output_file).exists():\n", - " urlretrieve(url, output_file)\n", - " logger.debug(f\"Downloaded {url} to {output_file}\")\n", - " else:\n", - " logger.debug(f\"File {output_file} already exists\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "FULL_CONTEXT_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with two keys: \"section\" and \"answer\".\n", - "In `\"section\"`, you will return the name of the section where you found the answer.\n", - "In `\"answer\"`, you will return the answer one of the following JSON:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"section\": \"2. Architecture\",\n", - " \"answer\": 12\n", - "}\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model?\n", - "-A: ReLU\n", - "-B: Sigmoid\n", - "-C: Tanh\n", - "{\n", - " \"section\": \"2. Architecture\",\n", - " \"answer\": \"C\"\n", - "}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=FULL_CONTEXT_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " download_document(document_link, downloaded_document)\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 0 } From 90275670dde1449e2347c208129e6c843d50a205 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 23 Jan 2025 11:52:48 +0100 Subject: [PATCH 060/120] Add ragatouille --- benchmark/RAGatouille.ipynb | 321 ++++++++++++++++++++++++++++ benchmark/gemini_full_context.ipynb | 74 +++---- 2 files changed, 355 insertions(+), 40 deletions(-) create mode 100644 benchmark/RAGatouille.ipynb diff --git a/benchmark/RAGatouille.ipynb b/benchmark/RAGatouille.ipynb new file mode 100644 index 0000000..205a89e --- /dev/null +++ b/benchmark/RAGatouille.ipynb @@ -0,0 +1,321 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## GPU Check" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, you'll need to enable GPUs for the notebook:\n", + "\n", + "- Navigate to `Edit`→`Notebook Settings`\n", + "- Select T4 GPU from the Hardware Accelerator section\n", + "- Click `Save` and accept.\n", + "\n", + "Next, we'll confirm that we can connect to the GPU:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "if not torch.cuda.is_available():\n", + " raise RuntimeError(\"GPU not available\")\n", + "else:\n", + " print(\"GPU is available!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install ragatouille" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to Download Document" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "from loguru import logger\n", + "\n", + "\n", + "def download_document(url, output_file):\n", + " if not Path(output_file).exists():\n", + " urlretrieve(url, output_file)\n", + " logger.info(f\"Downloaded {url} to {output_file}\")\n", + " else:\n", + " logger.info(f\"File {output_file} already exists\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ragatouille import RAGPretrainedModel\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "You should only answer with ANSWER_TYPE.\n", + "\n", + "The current information available is:\n", + "\n", + "{CURRENT_INFO}\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return the following message and nothing else:\n", + "\n", + "```\n", + "I need more info.\n", + "```\n", + "\"\"\"\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + " answer_prompt=ANSWER_WITH_TYPE_PROMPT,\n", + "):\n", + " logger.info(\"Setting up RAG\")\n", + " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", + " RAG.index(document_file)\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + " try:\n", + " float(row[\"answer\"])\n", + " answer_type = \"a number\"\n", + " except ValueError:\n", + " if row[\"answer\"] in (\"YES\", \"NO\"):\n", + " answer_type = \"YES or NO\"\n", + " else:\n", + " answer_type = \"a single letter\"\n", + "\n", + " answer_prompt = answer_prompt.replace(\"ANSWER_TYPE\", answer_type)\n", + "\n", + " logger.info(f\"Question: {question}\")\n", + " logger.info(\"RAG search\")\n", + " results = RAG.search(query=question, k=3)\n", + "\n", + " current_info = \"\\n\".join(result[\"content\"] for result in results)\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": answer_prompt.format(CURRENT_INFO=current_info),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " answer = model.get_response(messages)\n", + "\n", + " answers[index] = answer\n", + " sections[index] = None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = load_llama_cpp_model(\n", + " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " download_document(document_link, downloaded_document)\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index e26c5d0..d609482 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -327,7 +327,7 @@ "id": "qwHWJEsulyhV" }, "source": [ - "# Run Benchmark" + "# Setup" ] }, { @@ -348,15 +348,31 @@ "genai.configure(api_key=GEMINI_API_KEY)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to Download Document" + ] + }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "6t5RZiODlyhW" - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "from structured_qa.model_loaders import load_gemini_model" + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "from loguru import logger\n", + "\n", + "\n", + "def download_document(url, output_file):\n", + " if not Path(output_file).exists():\n", + " urlretrieve(url, output_file)\n", + " logger.info(f\"Downloaded {url} to {output_file}\")\n", + " else:\n", + " logger.info(f\"File {output_file} already exists\")" ] }, { @@ -376,17 +392,15 @@ }, "outputs": [], "source": [ - " import json\n", - " import time\n", - "\n", - " from loguru import logger\n", + "import json\n", + "import time\n", "\n", "\n", - " def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - " ):\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", " logger.info(\"Uploading file\")\n", " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", " while file.state.name == \"PROCESSING\":\n", @@ -416,39 +430,19 @@ { "cell_type": "markdown", "metadata": { - "id": "TpP0aRX_lyhY" + "id": "VQAof5xtlyhY" }, "source": [ - "## Function to Download Document" + "## Load Model" ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "UCgjvJ3WlyhY" - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "\n", - "def download_document(url, output_file):\n", - " if not Path(output_file).exists():\n", - " urlretrieve(url, output_file)\n", - " logger.debug(f\"Downloaded {url} to {output_file}\")\n", - " else:\n", - " logger.debug(f\"File {output_file} already exists\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" - }, - "source": [ - "## Load Model" + "from structured_qa.model_loaders import load_gemini_model" ] }, { From d2a3d9879edb81fb4168b004d0bb9fc2141ba357 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 23 Jan 2025 18:34:44 +0100 Subject: [PATCH 061/120] Fix --- benchmark/RAGatouille.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/RAGatouille.ipynb b/benchmark/RAGatouille.ipynb index 205a89e..20489ed 100644 --- a/benchmark/RAGatouille.ipynb +++ b/benchmark/RAGatouille.ipynb @@ -185,7 +185,7 @@ "):\n", " logger.info(\"Setting up RAG\")\n", " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", - " RAG.index(document_file)\n", + " RAG.index([document_file])\n", "\n", " logger.info(\"Predicting\")\n", " answers = {}\n", From 17942ca192e0493c7c061e6f908cc2b945122ef6 Mon Sep 17 00:00:00 2001 From: daavoo Date: Fri, 24 Jan 2025 10:35:20 +0100 Subject: [PATCH 062/120] Update notebooks --- benchmark/gemini_RAGatouille.ipynb | 333 ++++++++++++++++++++++++++++ benchmark/gemini_full_context.ipynb | 39 ++-- 2 files changed, 347 insertions(+), 25 deletions(-) create mode 100644 benchmark/gemini_RAGatouille.ipynb diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb new file mode 100644 index 0000000..c75885a --- /dev/null +++ b/benchmark/gemini_RAGatouille.ipynb @@ -0,0 +1,333 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## GPU Check" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, you'll need to enable GPUs for the notebook:\n", + "\n", + "- Navigate to `Edit`→`Notebook Settings`\n", + "- Select T4 GPU from the Hardware Accelerator section\n", + "- Click `Save` and accept.\n", + "\n", + "Next, we'll confirm that we can connect to the GPU:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "if not torch.cuda.is_available():\n", + " raise RuntimeError(\"GPU not available\")\n", + "else:\n", + " print(\"GPU is available!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install ragatouille" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "\n", + "GEMINI_API_KEY = None\n", + "if not GEMINI_API_KEY:\n", + " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", + "genai.configure(api_key=GEMINI_API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import PyPDF2\n", + "\n", + "def load_pdf(pdf_file: str) -> str | None:\n", + " try:\n", + " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", + " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", + " except Exception as e:\n", + " logger.exception(e)\n", + " return None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ragatouille import RAGPretrainedModel\n", + "from ragatouille.data import CorpusProcessor\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", + " logger.info(\"Setting up RAG\")\n", + " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", + " corpus_processor = CorpusProcessor()\n", + " documents = corpus_processor.process_corpus([load_pdf(document_file)])\n", + " RAG.encode([x['content'] for x in documents])\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + "\n", + " logger.info(f\"Question: {question}\")\n", + " logger.info(\"RAG search\")\n", + " results = RAG.search_encoded_docs(query=question, k=3)\n", + " logger.info(\"RESULTS\")\n", + " logger.info(results)\n", + " current_info = \"\\n\".join(result[\"content\"] for result in results)\n", + " logger.info(current_info)\n", + "\n", + " answer = model.model.generate_content([f\"This is the document: {current_info}\"], question)\n", + " logger.info(answer)\n", + " answers[index] = answer\n", + " sections[index] = None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with two keys: \"section\" and \"answer\".\n", + "In `\"section\"`, you will return the name of the section where you found the answer.\n", + "In `\"answer\"`, you will return the answer one of the following JSON:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model?\n", + "-A: ReLU\n", + "-B: Sigmoid\n", + "-C: Tanh\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index d609482..d7d0172 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -348,31 +348,13 @@ "genai.configure(api_key=GEMINI_API_KEY)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to Download Document" - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "from loguru import logger\n", - "\n", - "\n", - "def download_document(url, output_file):\n", - " if not Path(output_file).exists():\n", - " urlretrieve(url, output_file)\n", - " logger.info(f\"Downloaded {url} to {output_file}\")\n", - " else:\n", - " logger.info(f\"File {output_file} already exists\")" + "from loguru import logger" ] }, { @@ -381,7 +363,7 @@ "id": "MKijHC_ClyhX" }, "source": [ - "## Function to Process a single Document" + "## Function to Process all questions for a single Document" ] }, { @@ -396,7 +378,7 @@ "import time\n", "\n", "\n", - "def process_document(\n", + "def process_document_questions(\n", " document_file,\n", " document_data,\n", " model,\n", @@ -453,7 +435,7 @@ }, "outputs": [], "source": [ - "FULL_CONTEXT_PROMPT = \"\"\"\n", + "SYSTEM_PROMPT = \"\"\"\n", "You are given an input document and a question.\n", "You can only answer the question based on the information in the document.\n", "You will return a JSON name with two keys: \"section\" and \"answer\".\n", @@ -493,7 +475,7 @@ "source": [ "model = load_gemini_model(\n", " \"gemini-2.0-flash-exp\",\n", - " system_prompt=FULL_CONTEXT_PROMPT,\n", + " system_prompt=SYSTEM_PROMPT,\n", " generation_config={\n", " \"response_mime_type\": \"application/json\",\n", " },\n", @@ -941,6 +923,9 @@ } ], "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", "import pandas as pd\n", "\n", "\n", @@ -952,9 +937,13 @@ "for document_link, document_data in data.groupby(\"document\"):\n", " logger.info(f\"Downloading document {document_link}\")\n", " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " download_document(document_link, downloaded_document)\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", + " answers, sections = process_document_questions(downloaded_document, document_data, model)\n", "\n", " for index in document_data.index:\n", " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", From fcdd953b271bb700f4acb4ebb4947aa43ffeeb19 Mon Sep 17 00:00:00 2001 From: daavoo Date: Fri, 24 Jan 2025 13:00:07 +0100 Subject: [PATCH 063/120] Update gemini notebooks --- benchmark/gemini_RAGatouille.ipynb | 1938 ++++++++++++- benchmark/gemini_find_retrieve_answer.ipynb | 296 ++ benchmark/gemini_full_context.ipynb | 2728 ++++++++++--------- 3 files changed, 3534 insertions(+), 1428 deletions(-) create mode 100644 benchmark/gemini_find_retrieve_answer.ipynb diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb index c75885a..672520b 100644 --- a/benchmark/gemini_RAGatouille.ipynb +++ b/benchmark/gemini_RAGatouille.ipynb @@ -2,35 +2,45 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "Fcx4osZYq3mt" + }, "source": [ "# Structured Q&A" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "ZE32hJKeq3mv" + }, "source": [ "Source code: https://github.com/mozilla-ai/structured-qa" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "jDIEL7SNq3mv" + }, "source": [ "Docs: https://mozilla-ai.github.io/structured-qa" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "_OwS4mKRq3mv" + }, "source": [ "## GPU Check" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "_FYZaTmnq3mw" + }, "source": [ "First, you'll need to enable GPUs for the notebook:\n", "\n", @@ -43,9 +53,23 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4RsETkxfq3mw", + "outputId": "172850ad-a72e-434e-9686-9060fa95e660" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GPU is available!\n" + ] + } + ], "source": [ "import torch\n", "\n", @@ -57,55 +81,584 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "yEgVEmSQq3mx" + }, "source": [ "## Installing dependencies" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "P1eAychVq3my", + "outputId": "b152776c-81c3-487a-d804-09ef5fb75258" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting ragatouille\n", + " Downloading ragatouille-0.0.8.post4-py3-none-any.whl.metadata (15 kB)\n", + "Collecting colbert-ai==0.2.19 (from ragatouille)\n", + " Downloading colbert-ai-0.2.19.tar.gz (86 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.7 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.7/86.7 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting faiss-cpu<2.0.0,>=1.7.4 (from ragatouille)\n", + " Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)\n", + "Collecting fast-pytorch-kmeans==0.2.0.1 (from ragatouille)\n", + " Downloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl.metadata (1.1 kB)\n", + "Requirement already satisfied: langchain>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.14)\n", + "Requirement already satisfied: langchain_core>=0.1.4 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.29)\n", + "Collecting llama-index>=0.7 (from ragatouille)\n", + " Downloading llama_index-0.12.13-py3-none-any.whl.metadata (12 kB)\n", + "Collecting onnx<2.0.0,>=1.15.0 (from ragatouille)\n", + " Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n", + "Collecting sentence-transformers<3.0.0,>=2.2.2 (from ragatouille)\n", + " Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)\n", + "Collecting srsly==2.4.8 (from ragatouille)\n", + " Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", + "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.5.1+cu121)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.36.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (4.47.1)\n", + "Collecting voyager<3.0.0,>=2.0.2 (from ragatouille)\n", + " Downloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)\n", + "Collecting bitarray (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)\n", + "Collecting datasets (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: flask in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.1.0)\n", + "Collecting git-python (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading git_python-1.0.3-py2.py3-none-any.whl.metadata (331 bytes)\n", + "Collecting python-dotenv (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", + "Collecting ninja (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.13.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (4.67.1)\n", + "Collecting ujson (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fast-pytorch-kmeans==0.2.0.1->ragatouille) (1.26.4)\n", + "Collecting pynvml (from fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", + " Downloading pynvml-12.0.0-py3-none-any.whl.metadata (5.4 kB)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.11/dist-packages (from srsly==2.4.8->ragatouille) (2.0.10)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from faiss-cpu<2.0.0,>=1.7.4->ragatouille) (24.2)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.0.37)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (3.11.11)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.5)\n", + "Requirement already satisfied: langsmith<0.3,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.2.10)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.10.5)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (9.0.0)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (1.33)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (4.12.2)\n", + "Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_agent_openai-0.4.2-py3-none-any.whl.metadata (727 bytes)\n", + "Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting llama-index-core<0.13.0,>=0.12.13 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_core-0.12.13-py3-none-any.whl.metadata (2.5 kB)\n", + "Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)\n", + "Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting llama-index-multi-modal-llms-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl.metadata (726 bytes)\n", + "Collecting llama-index-program-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_program_openai-0.3.1-py3-none-any.whl.metadata (764 bytes)\n", + "Collecting llama-index-question-gen-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl.metadata (783 bytes)\n", + "Collecting llama-index-readers-file<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_readers_file-0.4.4-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting llama-index-readers-llama-parse>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl.metadata (3.6 kB)\n", + "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (3.9.1)\n", + "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.11/dist-packages (from onnx<2.0.0,>=1.15.0->ragatouille) (4.25.5)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (1.6.0)\n", + "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (0.27.1)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (11.1.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.16.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.5)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2024.10.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (1.13.1)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13->ragatouille) (12.6.85)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.13->ragatouille) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.21.0)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.5.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.18.3)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain_core>=0.1.4->ragatouille) (3.0.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (0.28.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (3.10.14)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.0)\n", + "Requirement already satisfied: openai>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.59.6)\n", + "Collecting dataclasses-json (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", + " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille) (1.2.15)\n", + "Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", + " Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)\n", + "Collecting filetype<2.0.0,>=1.2.0 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", + " Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille) (1.6.0)\n", + "Collecting tiktoken>=0.3.3 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", + " Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "Collecting typing-inspect>=0.8.0 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille) (1.17.0)\n", + "Collecting llama-cloud<0.2.0,>=0.1.8 (from llama-index-indices-managed-llama-cloud>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading llama_cloud-0.1.10-py3-none-any.whl.metadata (912 bytes)\n", + "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (4.12.3)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.2.2)\n", + "Collecting pypdf<6.0.0,>=5.1.0 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)\n", + "Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)\n", + "Collecting llama-parse>=0.5.0 (from llama-index-readers-llama-parse>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading llama_parse-0.5.20-py3-none-any.whl.metadata (6.9 kB)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (8.1.8)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (1.4.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2024.12.14)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain>=0.1.0->ragatouille) (3.1.1)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (17.0.0)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Collecting xxhash (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec (from torch>=1.13->ragatouille)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (3.1.3)\n", + "Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (2.2.0)\n", + "Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (1.9.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.13->ragatouille) (3.0.2)\n", + "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from git-python->colbert-ai==0.2.19->ragatouille) (3.1.44)\n", + "Collecting nvidia-ml-py<13.0.0a0,>=12.0.0 (from pynvml->fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", + " Downloading nvidia_ml_py-12.560.30-py3-none-any.whl.metadata (8.6 kB)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers<3.0.0,>=2.2.2->ragatouille) (3.5.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.6)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (0.14.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.9.0)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (0.8.2)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.3.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", + " Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->git-python->colbert-ai==0.2.19->ragatouille) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->git-python->colbert-ai==0.2.19->ragatouille) (5.0.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.17.0)\n", + "Downloading ragatouille-0.0.8.post4-py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl (8.8 kB)\n", + "Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (490 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m490.9/490.9 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.5/27.5 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index-0.12.13-py3-none-any.whl (6.9 kB)\n", + "Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.0/16.0 MB\u001b[0m \u001b[31m106.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_agent_openai-0.4.2-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_cli-0.4.0-py3-none-any.whl (27 kB)\n", + "Downloading llama_index_core-0.12.13-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m85.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl (6.2 kB)\n", + "Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl (14 kB)\n", + "Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl (5.9 kB)\n", + "Downloading llama_index_program_openai-0.3.1-py3-none-any.whl (5.3 kB)\n", + "Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl (2.9 kB)\n", + "Downloading llama_index_readers_file-0.4.4-py3-none-any.whl (39 kB)\n", + "Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl (2.5 kB)\n", + "Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (286 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.1/286.1 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading git_python-1.0.3-py2.py3-none-any.whl (1.9 kB)\n", + "Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m422.9/422.9 kB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pynvml-12.0.0-py3-none-any.whl (26 kB)\n", + "Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dirtyjson-1.0.8-py3-none-any.whl (25 kB)\n", + "Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", + "Downloading llama_cloud-0.1.10-py3-none-any.whl (247 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m248.0/248.0 kB\u001b[0m \u001b[31m23.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_parse-0.5.20-py3-none-any.whl (16 kB)\n", + "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_ml_py-12.560.30-py3-none-any.whl (40 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pypdf-5.1.0-py3-none-any.whl (297 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.0/298.0 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading striprtf-0.0.26-py3-none-any.whl (6.9 kB)\n", + "Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m69.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading marshmallow-3.26.0-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Building wheels for collected packages: colbert-ai\n", + " Building wheel for colbert-ai (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for colbert-ai: filename=colbert_ai-0.2.19-py3-none-any.whl size=114759 sha256=338c5f895f655f35f3dbcc0a7a946dacaa589dd2f089863452f6a6160a178d08\n", + " Stored in directory: /root/.cache/pip/wheels/14/75/5f/9680ae93eb0258ccf3e9d8cd34f328c53f8888c06c37067f3a\n", + "Successfully built colbert-ai\n", + "Installing collected packages: striprtf, nvidia-ml-py, filetype, dirtyjson, bitarray, xxhash, voyager, ujson, srsly, python-dotenv, pypdf, pynvml, onnx, ninja, mypy-extensions, marshmallow, fsspec, faiss-cpu, dill, typing-inspect, tiktoken, multiprocess, llama-cloud, git-python, dataclasses-json, llama-index-core, fast-pytorch-kmeans, datasets, sentence-transformers, llama-parse, llama-index-readers-file, llama-index-llms-openai, llama-index-indices-managed-llama-cloud, llama-index-embeddings-openai, colbert-ai, llama-index-readers-llama-parse, llama-index-multi-modal-llms-openai, llama-index-cli, llama-index-agent-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index, ragatouille\n", + " Attempting uninstall: srsly\n", + " Found existing installation: srsly 2.5.0\n", + " Uninstalling srsly-2.5.0:\n", + " Successfully uninstalled srsly-2.5.0\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + " Attempting uninstall: sentence-transformers\n", + " Found existing installation: sentence-transformers 3.3.1\n", + " Uninstalling sentence-transformers-3.3.1:\n", + " Successfully uninstalled sentence-transformers-3.3.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed bitarray-3.0.0 colbert-ai-0.2.19 dataclasses-json-0.6.7 datasets-3.2.0 dill-0.3.8 dirtyjson-1.0.8 faiss-cpu-1.9.0.post1 fast-pytorch-kmeans-0.2.0.1 filetype-1.2.0 fsspec-2024.9.0 git-python-1.0.3 llama-cloud-0.1.10 llama-index-0.12.13 llama-index-agent-openai-0.4.2 llama-index-cli-0.4.0 llama-index-core-0.12.13 llama-index-embeddings-openai-0.3.1 llama-index-indices-managed-llama-cloud-0.6.4 llama-index-llms-openai-0.3.14 llama-index-multi-modal-llms-openai-0.4.2 llama-index-program-openai-0.3.1 llama-index-question-gen-openai-0.3.0 llama-index-readers-file-0.4.4 llama-index-readers-llama-parse-0.4.0 llama-parse-0.5.20 marshmallow-3.26.0 multiprocess-0.70.16 mypy-extensions-1.0.0 ninja-1.11.1.3 nvidia-ml-py-12.560.30 onnx-1.17.0 pynvml-12.0.0 pypdf-5.1.0 python-dotenv-1.0.1 ragatouille-0.0.8.post4 sentence-transformers-2.7.0 srsly-2.4.8 striprtf-0.0.26 tiktoken-0.8.0 typing-inspect-0.9.0 ujson-5.10.0 voyager-2.1.0 xxhash-3.5.0\n" + ] + } + ], "source": [ - "%pip install ragatouille" + "%pip install ragatouille PyPDF2" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "I0dl5xGnq3my", + "outputId": "68c881e3-6208-4748-f71b-f5a52b787108" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-49ruike5\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-49ruike5\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 17942ca192e0493c7c061e6f908cc2b945122ef6\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev62+g17942ca) (0.27.1)\n", + "Collecting llama-cpp-python (from structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting loguru (from structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev62+g17942ca) (2.10.5)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev62+g17942ca) (6.0.2)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Collecting unsloth (from structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev62+g17942ca) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2024.9.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca) (1.26.4)\n", + "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca) (3.1.5)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev62+g17942ca) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev62+g17942ca) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (5.5.0)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (8.1.8)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (6.3.3)\n", + "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", + "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (2.5.1+cu121)\n", + "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", + "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)\n", + "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.1.0)\n", + "Collecting tyro (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading tyro-0.9.13-py3-none-any.whl.metadata (9.4 kB)\n", + "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (4.47.1)\n", + "Requirement already satisfied: datasets>=2.16.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.2.0)\n", + "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.2.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (5.9.5)\n", + "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.45.1)\n", + "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.2.1)\n", + "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.14.0)\n", + "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", + "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.5.2)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (1.21.1)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.3.8)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.70.16)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.11.11)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev62+g17942ca) (4.0.12)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca) (3.0.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2024.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2.18.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.4.2)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.13.1)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.6.85)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev62+g17942ca) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.21.0)\n", + "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", + "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.16)\n", + "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", + " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev62+g17942ca) (4.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.18.3)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev62+g17942ca) (5.0.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m117.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m114.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m99.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m101.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.7/69.7 MB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m108.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tyro-0.9.13-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", + "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", + "Building wheels for collected packages: structured-qa, fire, llama-cpp-python\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev62+g17942ca-py3-none-any.whl size=16254 sha256=4a483dde13b83e4423b427dc48638180c87ff1dca8cb35d4a09006ef2ca537d7\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-za51p1on/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=d9cddb798cd09136c441440f7a103ae7e5879184815ed603d5d49bd8a9e39570\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4070578 sha256=6aa44ca69ab5b970dc8498c4f69366acbfa247fd7a03849742858bf7ca77d063\n", + " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", + "Successfully built structured-qa fire llama-cpp-python\n", + "Installing collected packages: watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fire, diskcache, pymupdf4llm, pydeck, llama-cpp-python, tyro, xformers, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 4.25.5\n", + " Uninstalling protobuf-4.25.5:\n", + " Successfully uninstalled protobuf-4.25.5\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", + "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed bitsandbytes-0.45.1 cut_cross_entropy-25.1.1 diskcache-5.6.3 fire-0.7.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev62+g17942ca trl-0.13.0 tyro-0.9.13 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1\n" + ] + }, + { + "data": { + "application/vnd.colab-display-data+json": { + "id": "a56f4501de384e0e8c5cf504d1337657", + "pip_warning": { + "packages": [ + "google" + ] + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Nl_haxghq3mz", + "outputId": "18bc7cbf-feaa-481d-9d84-6c8390ff258d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-01-24 10:32:07-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 14711 (14K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "\rstructured_qa.csv 0%[ ] 0 --.-KB/s \rstructured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0s \n", + "\n", + "2025-01-24 10:32:07 (73.1 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", + "\n" + ] + } + ], "source": [ "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "ZdWx_e7iq3mz" + }, "source": [ "# Setup" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "id": "vGqX_bU5q3mz" + }, "outputs": [], "source": [ "import os\n", "import google.generativeai as genai\n", "\n", - "GEMINI_API_KEY = None\n", + "GEMINI_API_KEY = \"AIzaSyDxFKx8bdm3sAhQsy04jy2OPNl_tLZiKlY\"\n", "if not GEMINI_API_KEY:\n", " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", @@ -114,8 +667,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "id": "cbkIjBYNq3mz" + }, "outputs": [], "source": [ "from loguru import logger" @@ -123,12 +678,15 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 10, + "metadata": { + "id": "BiUeBWnIq3mz" + }, "outputs": [], "source": [ "import PyPDF2\n", "\n", + "\n", "def load_pdf(pdf_file: str) -> str | None:\n", " try:\n", " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", @@ -142,15 +700,20 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Function to Process a single Document" + "## Function to Process all questions for a single Document" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 47, + "metadata": { + "id": "Ilxn8LGFq3m0" + }, "outputs": [], "source": [ + "import json\n", + "import time\n", + "\n", "from ragatouille import RAGPretrainedModel\n", "from ragatouille.data import CorpusProcessor\n", "\n", @@ -164,41 +727,48 @@ " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", " corpus_processor = CorpusProcessor()\n", " documents = corpus_processor.process_corpus([load_pdf(document_file)])\n", - " RAG.encode([x['content'] for x in documents])\n", + " RAG.encode([x[\"content\"] for x in documents])\n", "\n", " logger.info(\"Predicting\")\n", " answers = {}\n", " sections = {}\n", " for index, row in document_data.iterrows():\n", + " if model.n > 0 and model.n % 9 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", " question = row[\"question\"]\n", "\n", " logger.info(f\"Question: {question}\")\n", - " logger.info(\"RAG search\")\n", " results = RAG.search_encoded_docs(query=question, k=3)\n", - " logger.info(\"RESULTS\")\n", - " logger.info(results)\n", " current_info = \"\\n\".join(result[\"content\"] for result in results)\n", - " logger.info(current_info)\n", + " logger.info(current_info[:100])\n", "\n", - " answer = model.model.generate_content([f\"This is the document: {current_info}\"], question)\n", - " logger.info(answer)\n", - " answers[index] = answer\n", + " answer = model.model.generate_content(\n", + " [f\"This is the document: {current_info}\", question]\n", + " )\n", + " logger.info(answer.text)\n", + " answers[index] = json.loads(answer.text)[\"answer\"]\n", " sections[index] = None\n", + " model.n += 1\n", "\n", " return answers, sections" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "jr3ke2aJq3m0" + }, "source": [ "## Load Model" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 48, + "metadata": { + "id": "zKMHc0Ouq3m0" + }, "outputs": [], "source": [ "from structured_qa.model_loaders import load_gemini_model" @@ -206,26 +776,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 49, + "metadata": { + "id": "cMBl2dxLq3m0" + }, "outputs": [], "source": [ "SYSTEM_PROMPT = \"\"\"\n", "You are given an input document and a question.\n", "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with two keys: \"section\" and \"answer\".\n", - "In `\"section\"`, you will return the name of the section where you found the answer.\n", - "In `\"answer\"`, you will return the answer one of the following JSON:\n", + "You will return a JSON name with a single key: \"answer\".\n", + "In `\"answer\"`, you will return the answer using one of the following JSON types:\n", "- Yes/No (for boolean questions)\n", "Is the model an LLM?\n", "{\n", - " \"section\": \"1. Introduction\",\n", " \"answer\": \"No\"\n", "}\n", "- Single number (for numeric questions)\n", "How many layers does the model have?\n", "{\n", - " \"section\": \"2. Architecture\",\n", " \"answer\": 12\n", "}\n", "- Single letter (for multiple-choice questions)\n", @@ -234,7 +803,6 @@ "-B: Sigmoid\n", "-C: Tanh\n", "{\n", - " \"section\": \"2. Architecture\",\n", " \"answer\": \"C\"\n", "}\n", "\"\"\"" @@ -242,8 +810,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 50, + "metadata": { + "id": "QV3pBXvhq3m0" + }, "outputs": [], "source": [ "model = load_gemini_model(\n", @@ -257,27 +827,907 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "j5jWlVBaq3m1" + }, "source": [ "# Run Benchmark" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 51, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "W9r17Rz3q3m1", + "outputId": "3232af63-09f7-4377-dff2-e8df725c1445" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-24 11:10:27.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-24 11:10:27.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-24 11:10:27.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 1706.03762.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:10:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 56 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", + "\u001b[32m2025-01-24 11:11:45.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 2106.09685v2.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:11:45.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 137 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-24 11:12:00.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 2201.11903.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:12:00.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 199 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-24 11:13:21.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 2210.05189.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:13:21.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 44 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-24 11:14:30.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:14:30.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 143 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-24 11:14:42.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:14:42.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 364 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/12 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-24 11:15:54.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile ?uri=OJ:L_202401689.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:15:54.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 754 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/24 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-24 11:17:22.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 7DUME_EN01_Rules.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:17:22.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 17 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:48.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile is_eotn_rulebook.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:48.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 48 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/2 [00:00There is no limit to the number, type, or order of \n", + "actions a player may take during the Action pha\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:55.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:55.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:55.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m>Discard any remaining, face-up Island cards and reveal new ones.\n", + " >Pass the First player marker to \u001b[0m\n", + "\u001b[32m2025-01-24 11:19:57.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:57.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:57.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mGAME FLOW\n", + "Note for Imperial Settlers fans \n", + "You cannot Spend 2 Workers \n", + "to get a Resource or a card.\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:58.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-24 11:19:58.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-24 11:20:58.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-24 11:20:58.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", + "the Resources on the righ\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:00.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", + "\"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:00.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:00.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mThus allowing a player to play \n", + "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:01.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:01.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:01.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:07.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:07.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:07.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-24 11:21:08.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n" + ] + } + ], "source": [ "from pathlib import Path\n", "from urllib.request import urlretrieve\n", "\n", "import pandas as pd\n", "\n", + "model.n = 0\n", + "\n", "logger.info(\"Loading input data\")\n", "data = pd.read_csv(\"structured_qa.csv\")\n", "data[\"pred_answer\"] = [None] * len(data)\n", "data[\"pred_section\"] = [None] * len(data)\n", - "\n", "for document_link, document_data in data.groupby(\"document\"):\n", " logger.info(f\"Downloading document {document_link}\")\n", " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", @@ -298,9 +1748,337 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 53, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 262 + }, + "id": "mltqL7Bhq3m1", + "outputId": "54479b50-365a-4f5c-a06e-b6de90b773b6" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 16,\n \"min\": 12,\n \"max\": 50,\n \"num_unique_values\": 4,\n \"samples\": [\n 22,\n 50,\n 12\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Prohibited AI Practices (Article 5)\",\n \"CHAPTER OVERVIEW\",\n \"2.1 Fully Connected Networks\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\",\n \"Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\",\n \"Does the model use Sigmoid activation function?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"NO\",\n \"C\",\n \"A\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"B\",\n \"C\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1212https://arxiv.org/pdf/2210.051892.1 Fully Connected NetworksDoes the model use Sigmoid activation function?NOYESNaN
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Prohibited AI Practices (Article 5)Which type of AI systems are banned by the AI ...CBNaN
2424https://eur-lex.europa.eu/legal-content/EN/TXT...Classification rules (article 51)What is the threshold, measured in floating po...CNOT APPLICABLENaN
5050https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWWhich player begins the game? -A: Sauron -B: T...ACNaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "12 12 https://arxiv.org/pdf/2210.05189 \n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "24 24 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "50 50 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "12 2.1 Fully Connected Networks \n", + "22 Prohibited AI Practices (Article 5) \n", + "24 Classification rules (article 51) \n", + "50 CHAPTER OVERVIEW \n", + "\n", + " question answer pred_answer \\\n", + "12 Does the model use Sigmoid activation function? NO YES \n", + "22 Which type of AI systems are banned by the AI ... C B \n", + "24 What is the threshold, measured in floating po... C NOT APPLICABLE \n", + "50 Which player begins the game? -A: Sauron -B: T... A C \n", + "\n", + " pred_section \n", + "12 NaN \n", + "22 NaN \n", + "24 NaN \n", + "50 NaN " + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "results = pd.read_csv(\"results.csv\")\n", "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" @@ -308,19 +2086,49 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 54, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c4z9XxXWq3m1", + "outputId": "6acb2a06-aaa7-460f-b6cd-6b7bf87aa24e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9473684210526315" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", "accuracy" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UXg_TC7R28QI" + }, + "outputs": [], + "source": [] } ], "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, "kernelspec": { - "display_name": ".venv", - "language": "python", + "display_name": "Python 3", "name": "python3" }, "language_info": { @@ -329,5 +2137,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 0 } diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb new file mode 100644 index 0000000..b0fd05c --- /dev/null +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "\n", + "GEMINI_API_KEY = None\n", + "if not GEMINI_API_KEY:\n", + " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", + "genai.configure(api_key=GEMINI_API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from structured_qa.config import FIND_PROMPT\n", + "from structured_qa.preprocessing import document_to_sections_dir\n", + "from structured_qa.workflow import find_retrieve_answer\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "You should only answer with ANSWER_TYPE.\n", + "\n", + "The current information available is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return the following message and nothing else:\n", + "\n", + "```\n", + "I need more info.\n", + "```\n", + "\"\"\"\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + " find_prompt: str = FIND_PROMPT,\n", + " answer_prompt: str = ANSWER_WITH_TYPE_PROMPT,\n", + "):\n", + " sections_dir = Path(\"sections\") / Path(document_file).stem\n", + " if not sections_dir.exists():\n", + " logger.info(\"Splitting document into sections\")\n", + " document_to_sections_dir(document_file, sections_dir)\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + " try:\n", + " float(row[\"answer\"])\n", + " answer_type = \"a number\"\n", + " except ValueError:\n", + " if row[\"answer\"] in (\"YES\", \"NO\"):\n", + " answer_type = \"YES or NO\"\n", + " else:\n", + " answer_type = \"a single letter\"\n", + "\n", + " answer_prompt = answer_prompt.replace(\"ANSWER_TYPE\", answer_type)\n", + "\n", + " logger.info(f\"Question: {question}\")\n", + " answer, sections_checked = find_retrieve_answer(\n", + " question, model, sections_dir, find_prompt, answer_prompt\n", + " )\n", + "\n", + " answers[index] = answer\n", + " sections[index] = sections_checked[-1] if sections_checked else None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with one keys: \"answer\".\n", + "In `\"answer\"`, you will return the answer one of the following JSON types:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model?\n", + "-A: ReLU\n", + "-B: Sigmoid\n", + "-C: Tanh\n", + "{\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index d7d0172..520f7d2 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -1,1383 +1,1385 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oD2lVadPlyhR" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JZeb4ABvlyhS" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aI1i7vlyhS" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "atlPXFshlyhS" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "QrgOGtuGlyhT", - "outputId": "62d28154-d186-4417-b032-6701fd174ecd" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-e3shdxjv\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-e3shdxjv\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit d125b79bb7bfdeab751f93bac37039950fe24ce5\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (0.27.1)\n", - "Collecting llama-cpp-python (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting loguru (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (2.10.5)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (6.0.2)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Collecting unsloth (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev56+gd125b79) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.12.2)\n", - "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (1.26.4)\n", - "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.1.5)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (8.1.8)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (6.3.3)\n", - "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", - "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.5.1+cu121)\n", - "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", - "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", - "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.1.0)\n", - "Collecting tyro (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading tyro-0.9.12-py3-none-any.whl.metadata (9.4 kB)\n", - "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.47.1)\n", - "Collecting datasets>=2.16.0 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.0)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (5.9.5)\n", - "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.45.1)\n", - "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.2.1)\n", - "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.14.0)\n", - "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", - "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", - "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.5.2)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.21.1)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", - "Collecting xxhash (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", - "Collecting fsspec>=2023.5.0 (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.11.11)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.0.12)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.0.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.18.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.4.2)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.13.1)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.6.85)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2024.11.6)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.21.0)\n", - "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", - "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.16)\n", - "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.4.1)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.18.3)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.0.2)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m288.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tyro-0.9.12-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", - "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", - "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire, llama-cpp-python\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev56+gd125b79-py3-none-any.whl size=16254 sha256=44ec8b803647a36e38d429c99dca9a41465cabf732b89ee30d835b5bf7ef397a\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-7rr3qwwf/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ae95bde960b4f7822e181432b7e15ac481d7e46da0b22e2e1070da6763218641\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4022116 sha256=7fdfa0bf98f5cf71204e7497d168a59fa164e54a9c7adecfd4bef61dd1277b44\n", - " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", - "Successfully built structured-qa fire llama-cpp-python\n", - "Installing collected packages: xxhash, watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fsspec, fire, diskcache, dill, pymupdf4llm, pydeck, multiprocess, llama-cpp-python, tyro, xformers, datasets, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", - " Attempting uninstall: protobuf\n", - " Found existing installation: protobuf 4.25.5\n", - " Uninstalling protobuf-4.25.5:\n", - " Successfully uninstalled protobuf-4.25.5\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2024.10.0\n", - " Uninstalling fsspec-2024.10.0:\n", - " Successfully uninstalled fsspec-2024.10.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", - "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n", - "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-25.1.1 datasets-3.2.0 dill-0.3.8 diskcache-5.6.3 fire-0.7.0 fsspec-2024.9.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 multiprocess-0.70.16 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev56+gd125b79 trl-0.13.0 tyro-0.9.12 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1 xxhash-3.5.0\n" - ] - }, - { - "data": { - "application/vnd.colab-display-data+json": { - "id": "6b04f797f18f4c54948485ed45a8dacd", - "pip_warning": { - "packages": [ - "google" - ] - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "S22kTrfPlyhU", - "outputId": "f77e1fb9-837a-4674-85f4-bc4e54815ed0" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-01-23 10:00:12-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 14711 (14K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", - "\n", - "structured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0.003s \n", - "\n", - "2025-01-23 10:00:19 (5.28 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qwHWJEsulyhV" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "iJ812u2llyhV" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "\n", - "GEMINI_API_KEY = None\n", - "if not GEMINI_API_KEY:\n", - " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", - "genai.configure(api_key=GEMINI_API_KEY)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MKijHC_ClyhX" - }, - "source": [ - "## Function to Process all questions for a single Document" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "oFU-eYMVlyhX" - }, - "outputs": [], - "source": [ - "import json\n", - "import time\n", - "\n", - "\n", - "def process_document_questions(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - "):\n", - " logger.info(\"Uploading file\")\n", - " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", - " while file.state.name == \"PROCESSING\":\n", - " logger.debug(\"Waiting for file to be processed.\")\n", - " time.sleep(2)\n", - " file = genai.get_file(file.name)\n", - "\n", - " logger.info(\"Predicting\")\n", - " n = 0\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " if n > 0 and n % 9 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " response = model.model.generate_content([file, question])\n", - " logger.info(response.text)\n", - " response_json = json.loads(response.text)\n", - " answers[index] = response_json[\"answer\"]\n", - " sections[index] = response_json[\"section\"]\n", - " n += 1\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "whtSJwdrlyhZ" - }, - "outputs": [], - "source": [ - "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with two keys: \"section\" and \"answer\".\n", - "In `\"section\"`, you will return the name of the section where you found the answer.\n", - "In `\"answer\"`, you will return the answer one of the following JSON:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"section\": \"2. Architecture\",\n", - " \"answer\": 12\n", - "}\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model?\n", - "-A: ReLU\n", - "-B: Sigmoid\n", - "-C: Tanh\n", - "{\n", - " \"section\": \"2. Architecture\",\n", - " \"answer\": \"C\"\n", - "}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "ObsvwlNslyhZ" - }, - "outputs": [], - "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "QrgOGtuGlyhT", + "outputId": "62d28154-d186-4417-b032-6701fd174ecd" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "W97jWzzOlyhZ" - }, - "source": [ - "# Run Benchmark" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-e3shdxjv\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-e3shdxjv\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit d125b79bb7bfdeab751f93bac37039950fe24ce5\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (0.27.1)\n", + "Collecting llama-cpp-python (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting loguru (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (2.10.5)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (6.0.2)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Collecting unsloth (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev56+gd125b79) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (1.26.4)\n", + "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.1.5)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (8.1.8)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (6.3.3)\n", + "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", + "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.5.1+cu121)\n", + "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", + "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", + "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.1.0)\n", + "Collecting tyro (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading tyro-0.9.12-py3-none-any.whl.metadata (9.4 kB)\n", + "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.47.1)\n", + "Collecting datasets>=2.16.0 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (5.9.5)\n", + "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.45.1)\n", + "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.2.1)\n", + "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.14.0)\n", + "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", + "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.5.2)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.21.1)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Collecting xxhash (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec>=2023.5.0 (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.11.11)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.0.12)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.0.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.18.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.4.2)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.13.1)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.6.85)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.21.0)\n", + "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", + "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.16)\n", + "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.18.3)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.0.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m288.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tyro-0.9.12-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", + "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire, llama-cpp-python\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev56+gd125b79-py3-none-any.whl size=16254 sha256=44ec8b803647a36e38d429c99dca9a41465cabf732b89ee30d835b5bf7ef397a\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-7rr3qwwf/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ae95bde960b4f7822e181432b7e15ac481d7e46da0b22e2e1070da6763218641\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4022116 sha256=7fdfa0bf98f5cf71204e7497d168a59fa164e54a9c7adecfd4bef61dd1277b44\n", + " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", + "Successfully built structured-qa fire llama-cpp-python\n", + "Installing collected packages: xxhash, watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fsspec, fire, diskcache, dill, pymupdf4llm, pydeck, multiprocess, llama-cpp-python, tyro, xformers, datasets, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 4.25.5\n", + " Uninstalling protobuf-4.25.5:\n", + " Successfully uninstalled protobuf-4.25.5\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n", + "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-25.1.1 datasets-3.2.0 dill-0.3.8 diskcache-5.6.3 fire-0.7.0 fsspec-2024.9.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 multiprocess-0.70.16 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev56+gd125b79 trl-0.13.0 tyro-0.9.12 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1 xxhash-3.5.0\n" + ] }, { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "AZBwRnfjlyhZ", - "outputId": "1cea7c23-9edf-45dc-d58d-09ca5b41d193" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-23 10:03:52.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:52.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:52.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:56.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:56.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:03.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3 Model Architecture\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:03.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:10.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:14.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:21.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.2.2 Multi-Head Attention\",\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:21.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:28.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.4 Embeddings and Softmax\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:28.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:35.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"6.2 Model Variations\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:35.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:42.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2 Hardware and Schedule\",\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:42.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:47.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2 Hardware and Schedule\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:47.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:51.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.3 Optimizer\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:51.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:51.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:55.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.3 Optimizer\",\n", - " \"answer\": 4000\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:55.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:00.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Training\",\n", - " \"answer\": \"0.1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:00.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:01.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:04.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:14.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. OUR METHOD\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:14.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:22.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Practical Benefits and Limitations.\",\n", - " \"answer\": 0.85\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:22.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:30.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"ABSTRACT\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:30.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:42.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"1. INTRODUCTION\",\n", - "\"answer\": \"175\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:42.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:52.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4.1 Low-Rank-Parametrized Update Matrices\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:52.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:53.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:56.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:56.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:04.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3 Arithmetic Reasoning\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:04.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:13.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.1 Experimental Setup\",\n", - "\"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:13.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:21.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.1 Experimental Setup\",\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:21.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:31.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"5 Symbolic Reasoning\",\n", - "\"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:31.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:41.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.4 Robustness of Chain of Thought\",\n", - "\"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:41.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many random samples for examined to understand model errors?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:50.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.2 Results\",\n", - " \"answer\": \"50\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5 Symbolic Reasoning\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:05.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:05.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:09.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:09.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the model use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:13.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. Decision Tree Analysis of Neural Networks\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:13.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parameters are in the y = x^2 toy model tree?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:16.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", - " \"answer\": \"39\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:16.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:20.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2.4. Recurrent Networks\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:23.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:27.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Conclusion\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:28.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:32.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:44.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:58.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:58.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:09.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"5. Acknowledgements\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:23.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:23.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:34.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:34.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In what year did the Bull and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:49.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Open Access Policies\",\n", - " \"answer\": 2015\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:49.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:01.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Chapter 5\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:01.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:02.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:06.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:20.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:37.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:37.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the daylight factor required for façades with exterior obstructions?\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:53.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. VISUAL COMFORT\",\n", - " \"answer\": \"0.7%\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:53.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:07.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:11.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:15.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:15.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:43.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 5\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:43.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:06.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 10\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 51\",\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:26.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:44.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. (29)\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:44.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:02.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 73\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:02.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 60\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:20.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:37.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 99\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:37.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:08.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 56\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:08.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 79\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:30.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:51.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 18\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:51.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:09.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 65\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:09.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:15.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:18.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:18.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:28.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"OVERVIEW AND GOAL\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:28.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:38.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:45.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"6\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:45.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Turn overview\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:54.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:03.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CHAPTER OVERVIEW\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:03.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:13.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"A. Take a Chapter card\",\n", - " \"answer\": \"3\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:13.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:21.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Chapter Overview\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:21.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:29.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CARD AND TILE COSTS\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:29.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:39.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. CARD AND TILE COSTS\",\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:39.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:39.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:48.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:48.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:57.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"CARD AND TILE EFFECTS\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:57.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:13.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:13.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:22.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:22.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:30.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": \"7\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:30.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:32.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:36.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:36.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:44.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"LOOKOUT PHASE\",\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:44.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:51.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"LOOKOUT PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:51.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:58.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"ACTION PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:58.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:05.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"EXPEDITION PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:05.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:12.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME FLOW\",\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:12.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:19.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME FLOW\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:19.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:27.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"9. ACTIONS\",\n", - " \"answer\": \"1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:27.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:36.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"ACTIONS\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:36.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:43.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME END\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:43.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:21:43.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:21:50.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME END\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document_questions(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] + "data": { + "application/vnd.colab-display-data+json": { + "id": "6b04f797f18f4c54948485ed45a8dacd", + "pip_warning": { + "packages": [ + "google" + ] + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "S22kTrfPlyhU", + "outputId": "f77e1fb9-837a-4674-85f4-bc4e54815ed0" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "a9eMkW1-lyha" - }, - "source": [ - "# Results" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-01-23 10:00:12-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 14711 (14K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "structured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0.003s \n", + "\n", + "2025-01-23 10:00:19 (5.28 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "\n", + "GEMINI_API_KEY = None\n", + "if not GEMINI_API_KEY:\n", + " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", + "genai.configure(api_key=GEMINI_API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "\n", + "def process_document_questions(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", + " logger.info(\"Uploading file\")\n", + " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", + " while file.state.name == \"PROCESSING\":\n", + " logger.debug(\"Waiting for file to be processed.\")\n", + " time.sleep(2)\n", + " file = genai.get_file(file.name)\n", + "\n", + " logger.info(\"Predicting\")\n", + " n = 0\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " if n > 0 and n % 10 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " response = model.model.generate_content([file, question])\n", + " logger.info(response.text)\n", + " response_json = json.loads(response.text)\n", + " answers[index] = response_json[\"answer\"]\n", + " sections[index] = response_json[\"section\"]\n", + " n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "whtSJwdrlyhZ" + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with two keys: \"section\" and \"answer\".\n", + "In `\"section\"`, you will return the name of the section where you found the answer.\n", + "In `\"answer\"`, you will return the answer one of the following JSON:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model?\n", + "-A: ReLU\n", + "-B: Sigmoid\n", + "-C: Tanh\n", + "{\n", + " \"section\": \"2. Architecture\",\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "AZBwRnfjlyhZ", + "outputId": "1cea7c23-9edf-45dc-d58d-09ca5b41d193" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 238 - }, - "id": "EYYJgWf6lyha", - "outputId": "347014fe-d6e6-4d0c-e094-dc5fcbed295e" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 5,\n \"max\": 74,\n \"num_unique_values\": 6,\n \"samples\": [\n 5,\n 13,\n 74\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"3 Model Architecture\",\n \"3 Experimental Results\",\n \"Natural lighting\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Does the final model use learned positional embeddings?\",\n \"How many parameters are in the y = x^2 toy model tree?\",\n \"What is the daylight factor required for fa\\u00e7ades with exterior obstructions?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"14\",\n \"0.7\",\n \"850\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"39\",\n \"0.7%\",\n \"0.85\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"6.2 Model Variations\",\n \"Table 1. Computation and memory analysis of toy problems.\",\n \"4. VISUAL COMFORT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
55https://arxiv.org/pdf/1706.037623 Model ArchitectureDoes the final model use learned positional em...NOYES6.2 Model Variations
1313https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many parameters are in the y = x^2 toy mod...1439Table 1. Computation and memory analysis of to...
1818https://arxiv.org/pdf/2106.09685v2.pdf5.5 Scaling Up to GPT-3How much memory is saved (in GB) when training...8500.854. Practical Benefits and Limitations.
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Prohibited AI Practices (Article 5)Which type of AI systems are banned by the AI ...CBArticle 5
3939https://authorsalliance.org/wp-content/uploads...Chapter 5 Where do you want to make your work ...Are Gold Open Access and Green Open Access mut...NOYESChapter 5
7474https://commission.europa.eu/document/download...Natural lightingWhat is the daylight factor required for façad...0.70.7%4. VISUAL COMFORT
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "5 5 https://arxiv.org/pdf/1706.03762 \n", - "13 13 https://arxiv.org/pdf/2210.05189 \n", - "18 18 https://arxiv.org/pdf/2106.09685v2.pdf \n", - "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "39 39 https://authorsalliance.org/wp-content/uploads... \n", - "74 74 https://commission.europa.eu/document/download... \n", - "\n", - " section \\\n", - "5 3 Model Architecture \n", - "13 3 Experimental Results \n", - "18 5.5 Scaling Up to GPT-3 \n", - "22 Prohibited AI Practices (Article 5) \n", - "39 Chapter 5 Where do you want to make your work ... \n", - "74 Natural lighting \n", - "\n", - " question answer pred_answer \\\n", - "5 Does the final model use learned positional em... NO YES \n", - "13 How many parameters are in the y = x^2 toy mod... 14 39 \n", - "18 How much memory is saved (in GB) when training... 850 0.85 \n", - "22 Which type of AI systems are banned by the AI ... C B \n", - "39 Are Gold Open Access and Green Open Access mut... NO YES \n", - "74 What is the daylight factor required for façad... 0.7 0.7% \n", - "\n", - " pred_section \n", - "5 6.2 Model Variations \n", - "13 Table 1. Computation and memory analysis of to... \n", - "18 4. Practical Benefits and Limitations. \n", - "22 Article 5 \n", - "39 Chapter 5 \n", - "74 4. VISUAL COMFORT " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-23 10:03:52.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:52.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:52.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:56.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:56.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:03.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3 Model Architecture\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:03.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:10.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:14.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:21.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.2.2 Multi-Head Attention\",\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:21.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:28.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.4 Embeddings and Softmax\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:28.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:35.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"6.2 Model Variations\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:35.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:42.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2 Hardware and Schedule\",\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:42.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:47.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2 Hardware and Schedule\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:47.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:51.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.3 Optimizer\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:51.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:51.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:55.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.3 Optimizer\",\n", + " \"answer\": 4000\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:55.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:00.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Training\",\n", + " \"answer\": \"0.1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:00.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:01.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:04.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:14.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. OUR METHOD\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:14.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:22.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Practical Benefits and Limitations.\",\n", + " \"answer\": 0.85\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:22.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:30.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ABSTRACT\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:30.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:42.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"1. INTRODUCTION\",\n", + "\"answer\": \"175\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:42.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:52.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4.1 Low-Rank-Parametrized Update Matrices\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:52.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:53.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:56.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:56.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:04.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3 Arithmetic Reasoning\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:04.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:13.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.1 Experimental Setup\",\n", + "\"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:13.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:21.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.1 Experimental Setup\",\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:21.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:31.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"5 Symbolic Reasoning\",\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:31.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:41.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.4 Robustness of Chain of Thought\",\n", + "\"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:41.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many random samples for examined to understand model errors?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:50.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.2 Results\",\n", + " \"answer\": \"50\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5 Symbolic Reasoning\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:05.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:05.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:09.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:09.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the model use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:13.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. Decision Tree Analysis of Neural Networks\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:13.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parameters are in the y = x^2 toy model tree?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:16.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", + " \"answer\": \"39\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:16.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:20.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2.4. Recurrent Networks\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:23.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:27.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Conclusion\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:28.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:32.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:44.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:58.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:58.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:09.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"5. Acknowledgements\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:23.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:23.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:34.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:34.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In what year did the Bull and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:49.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Open Access Policies\",\n", + " \"answer\": 2015\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:49.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:01.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Chapter 5\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:01.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:02.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:06.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:20.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:37.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:37.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the daylight factor required for façades with exterior obstructions?\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:53.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. VISUAL COMFORT\",\n", + " \"answer\": \"0.7%\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:53.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:07.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:11.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:15.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:15.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:43.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 5\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:43.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:06.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 10\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 51\",\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:26.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:44.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. (29)\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:44.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:02.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 73\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:02.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 60\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:20.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:37.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 99\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:37.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:08.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 56\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:08.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 79\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:30.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:51.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 18\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:51.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:09.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 65\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:09.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:15.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:18.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:18.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:28.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"OVERVIEW AND GOAL\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:28.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:38.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:45.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"6\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:45.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Turn overview\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:54.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:03.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CHAPTER OVERVIEW\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:03.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:13.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"A. Take a Chapter card\",\n", + " \"answer\": \"3\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:13.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:21.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Chapter Overview\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:21.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:29.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CARD AND TILE COSTS\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:29.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:39.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. CARD AND TILE COSTS\",\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:39.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:39.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:48.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:48.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:57.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"CARD AND TILE EFFECTS\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:57.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:13.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:13.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:22.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:22.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:30.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": \"7\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:30.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:32.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:36.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:36.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:44.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"LOOKOUT PHASE\",\n", + " \"answer\": 4\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:44.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:51.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"LOOKOUT PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:51.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:58.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ACTION PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:58.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:05.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"EXPEDITION PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:05.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:12.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME FLOW\",\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:12.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:19.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME FLOW\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:19.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:27.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"9. ACTIONS\",\n", + " \"answer\": \"1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:27.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:36.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ACTIONS\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:36.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:43.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME END\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:43.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:21:43.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:21:50.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME END\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document_questions(\n", + " downloaded_document, document_data, model\n", + " )\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 }, + "id": "EYYJgWf6lyha", + "outputId": "347014fe-d6e6-4d0c-e094-dc5fcbed295e" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "wfz1XQDLlyha", - "outputId": "36597dcf-ea15-414d-d66f-a4cb9102c4da" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 5,\n \"max\": 74,\n \"num_unique_values\": 6,\n \"samples\": [\n 5,\n 13,\n 74\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"3 Model Architecture\",\n \"3 Experimental Results\",\n \"Natural lighting\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Does the final model use learned positional embeddings?\",\n \"How many parameters are in the y = x^2 toy model tree?\",\n \"What is the daylight factor required for fa\\u00e7ades with exterior obstructions?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"14\",\n \"0.7\",\n \"850\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"39\",\n \"0.7%\",\n \"0.85\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"6.2 Model Variations\",\n \"Table 1. Computation and memory analysis of toy problems.\",\n \"4. VISUAL COMFORT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9210526315789473" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
55https://arxiv.org/pdf/1706.037623 Model ArchitectureDoes the final model use learned positional em...NOYES6.2 Model Variations
1313https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many parameters are in the y = x^2 toy mod...1439Table 1. Computation and memory analysis of to...
1818https://arxiv.org/pdf/2106.09685v2.pdf5.5 Scaling Up to GPT-3How much memory is saved (in GB) when training...8500.854. Practical Benefits and Limitations.
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Prohibited AI Practices (Article 5)Which type of AI systems are banned by the AI ...CBArticle 5
3939https://authorsalliance.org/wp-content/uploads...Chapter 5 Where do you want to make your work ...Are Gold Open Access and Green Open Access mut...NOYESChapter 5
7474https://commission.europa.eu/document/download...Natural lightingWhat is the daylight factor required for façad...0.70.7%4. VISUAL COMFORT
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" + "text/plain": [ + " Unnamed: 0 document \\\n", + "5 5 https://arxiv.org/pdf/1706.03762 \n", + "13 13 https://arxiv.org/pdf/2210.05189 \n", + "18 18 https://arxiv.org/pdf/2106.09685v2.pdf \n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "39 39 https://authorsalliance.org/wp-content/uploads... \n", + "74 74 https://commission.europa.eu/document/download... \n", + "\n", + " section \\\n", + "5 3 Model Architecture \n", + "13 3 Experimental Results \n", + "18 5.5 Scaling Up to GPT-3 \n", + "22 Prohibited AI Practices (Article 5) \n", + "39 Chapter 5 Where do you want to make your work ... \n", + "74 Natural lighting \n", + "\n", + " question answer pred_answer \\\n", + "5 Does the final model use learned positional em... NO YES \n", + "13 How many parameters are in the y = x^2 toy mod... 14 39 \n", + "18 How much memory is saved (in GB) when training... 850 0.85 \n", + "22 Which type of AI systems are banned by the AI ... C B \n", + "39 Are Gold Open Access and Green Open Access mut... NO YES \n", + "74 What is the daylight factor required for façad... 0.7 0.7% \n", + "\n", + " pred_section \n", + "5 6.2 Model Variations \n", + "13 Table 1. Computation and memory analysis of to... \n", + "18 4. Practical Benefits and Limitations. \n", + "22 Article 5 \n", + "39 Chapter 5 \n", + "74 4. VISUAL COMFORT " ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python", - "version": "3.10.12" + "id": "wfz1XQDLlyha", + "outputId": "36597dcf-ea15-414d-d66f-a4cb9102c4da" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9210526315789473" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } From bfdacea8db390d9b3ab240676ea3b9e51276cd87 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 27 Jan 2025 16:23:13 +0100 Subject: [PATCH 064/120] Extend structured_qa. Add perfect_context. --- benchmark/perfect_context/1 INTRODUCTION.txt | 49 ++++++ .../1.2.1. Internal partitions and doors.txt | 16 ++ .../15.3. API Fundamentals.txt | 22 +++ benchmark/perfect_context/2.1. Toilets.txt | 8 + .../2.4 Recurrent Networks.txt | 59 +++++++ .../23.1. What is Lazy Loading?.txt | 21 +++ .../3 Arithmetic Reasoning.txt | 6 + .../3 Experimental Results.txt | 91 +++++++++++ .../perfect_context/3 Model Architecture.txt | 9 ++ .../3.1 Encoder and Decoder Stacks.txt | 15 ++ .../3.1 Experimental Setup.txt | 32 ++++ benchmark/perfect_context/3.2 Results.txt | 50 ++++++ .../3.2.2 Multi-Head Attention.txt | 21 +++ .../3.4 Embeddings and Softmax.txt | 5 + .../3.4 Robustness of Chain of Thought.txt | 33 ++++ .../3.5 Positional Encoding.txt | 18 +++ benchmark/perfect_context/4 OUR METHOD.txt | 3 + .../perfect_context/4.1. Natural lighting.txt | 7 + .../perfect_context/5 Symbolic Reasoning.txt | 43 +++++ .../5.2 Hardware and Schedule.txt | 5 + .../perfect_context/5.2. Thread Hierarchy.txt | 75 +++++++++ benchmark/perfect_context/5.3 Optimizer.txt | 7 + .../perfect_context/5.4 Regularization.txt | 7 + .../5.5 SCALING UP TO GPT-3 175B.txt | 11 ++ .../6.1.1. Compilation Workflow.txt | 30 ++++ benchmark/perfect_context/ACTION PHASE.txt | 28 ++++ .../CARBON MONOXIDE DETECTION AND VENTING.txt | 13 ++ .../perfect_context/CARD AND TILE COSTS.txt | 39 +++++ .../perfect_context/CARD AND TILE EFFECTS.txt | 31 ++++ ... YOU WANT TO MAKE YOUR WORK AVAILABLE?.txt | 35 ++++ .../perfect_context/CHAPTER OVERVIEW.txt | 43 +++++ benchmark/perfect_context/CLEANUP PHASE.txt | 14 ++ .../CONQUERING MIDDLE-EARTH.txt | 14 ++ ...l-purpose AI models with systemic risk.txt | 13 ++ .../perfect_context/Codes of practice.txt | 39 +++++ ...pliant AI systems which present a risk.txt | 20 +++ .../Data and data governance.txt | 49 ++++++ benchmark/perfect_context/END OF THE GAME.txt | 15 ++ .../EU declaration of conformity.txt | 20 +++ .../perfect_context/EXPEDITION PHASE.txt | 24 +++ ...European Artificial Intelligence Board.txt | 29 ++++ benchmark/perfect_context/Europe.txt | 34 ++++ benchmark/perfect_context/GAME END.txt | 20 +++ ...O YOU CHOOSE AN OPEN ACCESS PUBLISHER?.txt | 92 +++++++++++ ...LLM Tokenization Introduces Unfairness.txt | 34 ++++ .../perfect_context/LOCATION ABILITIES.txt | 56 +++++++ benchmark/perfect_context/LOOKOUT PHASE.txt | 36 +++++ benchmark/perfect_context/LORA ABSTRACT.txt | 18 +++ ...RCOMING RESERVATIONS ABOUT OPEN ACCESS.txt | 152 ++++++++++++++++++ .../perfect_context/OVERVIEW AND GOAL.txt | 7 + benchmark/perfect_context/Penalties.txt | 0 .../Prohibited AI Practices.txt | 105 ++++++++++++ benchmark/perfect_context/RAID.txt | 12 ++ .../Reporting of serious incidents.txt | 40 +++++ benchmark/perfect_context/Risk Perception.txt | 29 ++++ .../SECTION I. INTRODUCTION.txt | 65 ++++++++ ...itions outside AI regulatory sandboxes.txt | 0 benchmark/perfect_context/Training Cost.txt | 44 +++++ ...rs and deployers of certain AI systems.txt | 44 +++++ benchmark/perfect_context/U.S. Regulation.txt | 71 ++++++++ .../WHY ARE OPEN ACCESS POLICIES ADOPTED?.txt | 33 ++++ benchmark/structured_qa.csv | 114 +++++++------ src/structured_qa/workflow.py | 2 +- 63 files changed, 2031 insertions(+), 46 deletions(-) create mode 100644 benchmark/perfect_context/1 INTRODUCTION.txt create mode 100644 benchmark/perfect_context/1.2.1. Internal partitions and doors.txt create mode 100644 benchmark/perfect_context/15.3. API Fundamentals.txt create mode 100644 benchmark/perfect_context/2.1. Toilets.txt create mode 100644 benchmark/perfect_context/2.4 Recurrent Networks.txt create mode 100644 benchmark/perfect_context/23.1. What is Lazy Loading?.txt create mode 100644 benchmark/perfect_context/3 Arithmetic Reasoning.txt create mode 100644 benchmark/perfect_context/3 Experimental Results.txt create mode 100644 benchmark/perfect_context/3 Model Architecture.txt create mode 100644 benchmark/perfect_context/3.1 Encoder and Decoder Stacks.txt create mode 100644 benchmark/perfect_context/3.1 Experimental Setup.txt create mode 100644 benchmark/perfect_context/3.2 Results.txt create mode 100644 benchmark/perfect_context/3.2.2 Multi-Head Attention.txt create mode 100644 benchmark/perfect_context/3.4 Embeddings and Softmax.txt create mode 100644 benchmark/perfect_context/3.4 Robustness of Chain of Thought.txt create mode 100644 benchmark/perfect_context/3.5 Positional Encoding.txt create mode 100644 benchmark/perfect_context/4 OUR METHOD.txt create mode 100644 benchmark/perfect_context/4.1. Natural lighting.txt create mode 100644 benchmark/perfect_context/5 Symbolic Reasoning.txt create mode 100644 benchmark/perfect_context/5.2 Hardware and Schedule.txt create mode 100644 benchmark/perfect_context/5.2. Thread Hierarchy.txt create mode 100644 benchmark/perfect_context/5.3 Optimizer.txt create mode 100644 benchmark/perfect_context/5.4 Regularization.txt create mode 100644 benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt create mode 100644 benchmark/perfect_context/6.1.1. Compilation Workflow.txt create mode 100644 benchmark/perfect_context/ACTION PHASE.txt create mode 100644 benchmark/perfect_context/CARBON MONOXIDE DETECTION AND VENTING.txt create mode 100644 benchmark/perfect_context/CARD AND TILE COSTS.txt create mode 100644 benchmark/perfect_context/CARD AND TILE EFFECTS.txt create mode 100644 benchmark/perfect_context/CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?.txt create mode 100644 benchmark/perfect_context/CHAPTER OVERVIEW.txt create mode 100644 benchmark/perfect_context/CLEANUP PHASE.txt create mode 100644 benchmark/perfect_context/CONQUERING MIDDLE-EARTH.txt create mode 100644 benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt create mode 100644 benchmark/perfect_context/Codes of practice.txt create mode 100644 benchmark/perfect_context/Compliant AI systems which present a risk.txt create mode 100644 benchmark/perfect_context/Data and data governance.txt create mode 100644 benchmark/perfect_context/END OF THE GAME.txt create mode 100644 benchmark/perfect_context/EU declaration of conformity.txt create mode 100644 benchmark/perfect_context/EXPEDITION PHASE.txt create mode 100644 benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt create mode 100644 benchmark/perfect_context/Europe.txt create mode 100644 benchmark/perfect_context/GAME END.txt create mode 100644 benchmark/perfect_context/HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?.txt create mode 100644 benchmark/perfect_context/LLM Tokenization Introduces Unfairness.txt create mode 100644 benchmark/perfect_context/LOCATION ABILITIES.txt create mode 100644 benchmark/perfect_context/LOOKOUT PHASE.txt create mode 100644 benchmark/perfect_context/LORA ABSTRACT.txt create mode 100644 benchmark/perfect_context/OVERCOMING RESERVATIONS ABOUT OPEN ACCESS.txt create mode 100644 benchmark/perfect_context/OVERVIEW AND GOAL.txt create mode 100644 benchmark/perfect_context/Penalties.txt create mode 100644 benchmark/perfect_context/Prohibited AI Practices.txt create mode 100644 benchmark/perfect_context/RAID.txt create mode 100644 benchmark/perfect_context/Reporting of serious incidents.txt create mode 100644 benchmark/perfect_context/Risk Perception.txt create mode 100644 benchmark/perfect_context/SECTION I. INTRODUCTION.txt create mode 100644 benchmark/perfect_context/Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes.txt create mode 100644 benchmark/perfect_context/Training Cost.txt create mode 100644 benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt create mode 100644 benchmark/perfect_context/U.S. Regulation.txt create mode 100644 benchmark/perfect_context/WHY ARE OPEN ACCESS POLICIES ADOPTED?.txt diff --git a/benchmark/perfect_context/1 INTRODUCTION.txt b/benchmark/perfect_context/1 INTRODUCTION.txt new file mode 100644 index 0000000..98322b8 --- /dev/null +++ b/benchmark/perfect_context/1 INTRODUCTION.txt @@ -0,0 +1,49 @@ +Many applications in natural language processing rely on adapt- +ing one large-scale, pre-trained language model to multiple down- +stream applications. Such adaptation is usually done via fine-tuning, +which updates all the parameters of the pre-trained model. The ma- +jor downside of fine-tuning is that the new model contains as many +parameters as in the original model. As larger models are trained +every few months, this changes from a mere “inconvenience” for +GPT-2 (Radford et al., b) or RoBERTa large (Liu et al., 2019) to a +critical deployment challenge for GPT-3 (Brown et al., 2020) with +175 billion trainable parameters.1 +Many sought to mitigate this by adapting only some parameters or +learning external modules for new tasks. This way, we only need +to store and load a small number of task-specific parameters in ad- +dition to the pre-trained model for each task, greatly boosting the +operational efficiency when deployed. However, existing techniques +ften introduce inference latency (Houlsby et al., 2019; Rebuffi et al., 2017) by extending model +depth or reduce the model’s usable sequence length (Li & Liang, 2021; Lester et al., 2021; Ham- +bardzumyan et al., 2020; Liu et al., 2021) (Section 3). More importantly, these method often fail to +match the fine-tuning baselines, posing a trade-off between efficiency and model quality. +We take inspiration from Li et al. (2018a); Aghajanyan et al. (2020) which show that the learned +over-parametrized models in fact reside on a low intrinsic dimension. We hypothesize that the +change in weights during model adaptation also has a low “intrinsic rank”, leading to our proposed +Low-Rank Adaptation (LoRA) approach. LoRA allows us to train some dense layers in a neural +network indirectly by optimizing rank decomposition matrices of the dense layers’ change during +adaptation instead, while keeping the pre-trained weights frozen, as shown in Figure 1. Using GPT-3 +175B as an example, we show that a very low rank (i.e., r in Figure 1 can be one or two) suffices even +when the full rank (i.e., d) is as high as 12,288, making LoRA both storage- and compute-efficient. +LoRA possesses several key advantages. +• A pre-trained model can be shared and used to build many small LoRA modules for dif- +ferent tasks. We can freeze the shared model and efficiently switch tasks by replacing the +matrices A and B in Figure 1, reducing the storage requirement and task-switching over- +head significantly. +• LoRA makes training more efficient and lowers the hardware barrier to entry by up to 3 +times when using adaptive optimizers since we do not need to calculate the gradients or +maintain the optimizer states for most parameters. Instead, we only optimize the injected, +much smaller low-rank matrices. +• Our simple linear design allows us to merge the trainable matrices with the frozen weights +when deployed, introducing no inference latency compared to a fully fine-tuned model, by +construction. +• LoRA is orthogonal to many prior methods and can be combined with many of them, such +as prefix-tuning. We provide an example in Appendix E. +Terminologies and Conventions We make frequent references to the Transformer architecture +and use the conventional terminologies for its dimensions. We call the input and output di- +mension size of a Transformer layer dmodel. We use Wq , Wk, Wv , and Wo to refer to the +query/key/value/output projection matrices in the self-attention module. W or W0 refers to a pre- +trained weight matrix and ∆W its accumulated gradient update during adaptation. We use r to +denote the rank of a LoRA module. We follow the conventions set out by (Vaswani et al., 2017; +Brown et al., 2020) and use Adam (Loshchilov & Hutter, 2019; Kingma & Ba, 2017) for model +optimization and use a Transformer MLP feedforward dimension df f n = 4 × dmodel. diff --git a/benchmark/perfect_context/1.2.1. Internal partitions and doors.txt b/benchmark/perfect_context/1.2.1. Internal partitions and doors.txt new file mode 100644 index 0000000..ebc0ecd --- /dev/null +++ b/benchmark/perfect_context/1.2.1. Internal partitions and doors.txt @@ -0,0 +1,16 @@ +Fire resistance of partitions: +- Copy rooms must have vertical partitions with a fire resistance of EI 30 and doors must have +a fire resistance of EI1 30 and close automatically (linked to the fire detection system). +Door retaining devices: +- Certain fire doors for rooms which are accessed or traversed very frequently are kept open +using magnetic retainers linked to the fire detection system (e.g. entrance halls and lift +lobbies, corridor compartment doors, kitchenette doors and doors of copy rooms). +- As a minimum, rooms accommodating kitchenettes must have doors which close +automatically (linked to the fire detection system). +Door closers: +- In addition to the requirements set out in the applicable legislation, access doors to +toilets/washrooms, kitchenettes, copy rooms, etc. must also be fitted with door closers. +Horizontal communication between two buildings: +- In the case of doors forming an airlock between two buildings, an intermittent red light +signal should be placed above or beside the door frames. This signal should light up on the +non-dangerous side to indicate the danger when the alarm is raised. diff --git a/benchmark/perfect_context/15.3. API Fundamentals.txt b/benchmark/perfect_context/15.3. API Fundamentals.txt new file mode 100644 index 0000000..e6fed3a --- /dev/null +++ b/benchmark/perfect_context/15.3. API Fundamentals.txt @@ -0,0 +1,22 @@ +Graph memory nodes are graph nodes representing either memory allocation or free actions. As a +shorthand, nodes that allocate memory are called allocation nodes. Likewise, nodes that free memory +are called free nodes. Allocations created by allocation nodes are called graph allocations. CUDA as- +signs virtual addresses for the graph allocation at node creation time. While these virtual addresses +are fixed for the lifetime of the allocation node, the allocation contents are not persistent past the +freeing operation and may be overwritten by accesses referring to a different allocation. +Graph allocations are considered recreated every time a graph runs. A graph allocation’s lifetime, which +differs from the node’s lifetime, begins when GPU execution reaches the allocating graph node and +ends when one of the following occurs: +▶ GPU execution reaches the freeing graph node +▶ GPU execution reaches the freeing cudaFreeAsync() stream call +▶ immediately upon the freeing call to cudaFree() +Note: Graph destruction does not automatically free any live graph-allocated memory, even though it +ends the lifetime of the allocation node. The allocation must subsequently be freed in another graph, +or using cudaFreeAsync()∕cudaFree(). +Just like other Graph Structure, graph memory nodes are ordered within a graph by dependency edges. +A program must guarantee that operations accessing graph memory: +▶ are ordered after the allocation node +▶ are ordered before the operation freeing the memory +Graph allocation lifetimes begin and usually end according to GPU execution (as opposed to API invo- +cation). GPU ordering is the order that work runs on the GPU as opposed to the order that the work +is enqueued or described. Thus, graph allocations are considered ‘GPU ordered. diff --git a/benchmark/perfect_context/2.1. Toilets.txt b/benchmark/perfect_context/2.1. Toilets.txt new file mode 100644 index 0000000..01822c4 --- /dev/null +++ b/benchmark/perfect_context/2.1. Toilets.txt @@ -0,0 +1,8 @@ +Toilets must be installed on each level containing office rooms and for each structural unit; +they must be distributed uniformly and located in a central area. +Sinks must be supplied exclusively with cold water. +Accessibility for persons with reduced mobility (PRM) +In the event that a new office building is constructed upon request by the Commission, one +toilet which is accessible for persons with reduced mobility must be installed on each level +containing office rooms or similar. +In other cases, the requirements of the applicable legislation must be observed diff --git a/benchmark/perfect_context/2.4 Recurrent Networks.txt b/benchmark/perfect_context/2.4 Recurrent Networks.txt new file mode 100644 index 0000000..3e36e3a --- /dev/null +++ b/benchmark/perfect_context/2.4 Recurrent Networks.txt @@ -0,0 +1,59 @@ +As recurrent neural networks (RNNs) can be unrolled to +feed-forward representation, RNNs can also be equivalently +represented as decision trees. We study following recurrent +neural network. Note that we simply omit the bias terms as +they can be represented by concatenating a 1 value to input +vectors. +h(t) = σ(WT h(t−1) + UT x(t)) +o(t) = VT h(t) (12) +Similar to previous analysis, one can rewrite h(t) as fol- +lows. +h(t) = a(t) (WT h(t−1) + UT x(t)) (13) +Eq. 13 can be rewritten follows. +h(t) = a(t) ( +1∏ +j=(t−1) +(WT a(j)))WT h(0) ++a(t) +t∑ +i=1 +( +i∏ +j=(t−1) +(WT a(j)))UT x(i) +(14) +Note that in Eq. 14, the product operator stands for ma- +trix multiplication, its steps are −1 and we consider the out- +put of product operator to be 1 when i = t. One can rewrite +Eq. 14 by introducing cj ˆWj as follows. +h(t) = a(t) c1 ˆW1WT h(0) + a(t) +t∑ +i=1 +ci ˆWiUT x(i) +ci ˆWT +i = +i∏ +j=(t−1) +(WT a(j) +Combining Eq. 15 and Eq. 12, one can write o(t) as +follows. +o(t) = a(t) ˆVT +c1 ˆW1WT h(0) +a(t) ˆVT t∑ +i=1 +ci ˆWiUT x(i) (16) +Eq. 16 can be further simplified to the following. +o(t) = c1 ˆZT +1 WT h(0) + +t∑ +i=1 +ci ˆZiUT x(i) (17) +In Eq. 17, ci ˆZT +i = a(t) ˆVT +ci ˆWi .As one can observe from +Eq. 17, the RNN output only depends on the categoriza- +tion vector ci, which enables the tree equivalence -similar +to previous analysis. +Note that for RNNs, a popular choice for σ in Eq. 12 +is tanh. As mentioned in Section 2.3, in order to provide +finite trees, one might consider using a piece-wise linear +approximation of tanh. diff --git a/benchmark/perfect_context/23.1. What is Lazy Loading?.txt b/benchmark/perfect_context/23.1. What is Lazy Loading?.txt new file mode 100644 index 0000000..f038a0b --- /dev/null +++ b/benchmark/perfect_context/23.1. What is Lazy Loading?.txt @@ -0,0 +1,21 @@ +Lazy Loading delays loading of CUDA modules and kernels from program initalization closer to kernels +execution. If a program does not use every single kernel it has included, then some kernels will be +loaded unneccesarily. This is very common, especially if you include any libraries. Most of the time, +programs only use a small amount of kernels from libraries they include. +Thanks to Lazy Loading, programs are able to only load kernels they are actually going to use, saving +time on initialization. This reduces memory overhead, both on GPU memory and host memory. +Lazy Loading is enabled by setting the CUDA_MODULE_LOADING environment variable to LAZY. +Firstly, CUDA Runtime will no longer load all modules during program initialization, with the exception +of modules containing managed variables. Each module will be loaded on first usage of a variable or +a kernel from that module. This optimization is only relevant to CUDA Runtime users, CUDA Driver +users who use cuModuleLoad are unaffected. This optimization shipped in CUDA 11.8. The behavior +for CUDA Driver users who use cuLibraryLoad to load module data into memory can be changed by +setting the CUDA_MODULE_DATA_LOADING environment variable. +Secondly, loading a module (cuModuleLoad*() family of functions) will not be loading kernels immedi- +ately, instead it will delay loading of a kernel until cuModuleGetFunction() is called. There are certain +exceptions here, some kernels have to be loaded during cuModuleLoad*(), such as kernels of which +pointers are stored in global variables. This optimization is relevant to both CUDA Runtime and CUDA +Driver users. CUDA Runtime will only call cuModuleGetFunction() when a kernel is used/referenced +for the first time. This optimization shipped in CUDA 11.7. +Both of these optimizations are designed to be invisible to the user, assuming CUDA Programming +Model is followed. diff --git a/benchmark/perfect_context/3 Arithmetic Reasoning.txt b/benchmark/perfect_context/3 Arithmetic Reasoning.txt new file mode 100644 index 0000000..8c12727 --- /dev/null +++ b/benchmark/perfect_context/3 Arithmetic Reasoning.txt @@ -0,0 +1,6 @@ +We begin by considering math word problems of the form in Figure 1, which measure the arithmetic +reasoning ability of language models. Though simple for humans, arithmetic reasoning is a task where +language models often struggle (Hendrycks et al., 2021; Patel et al., 2021, inter alia). Strikingly, chain- +of-thought prompting when used with the 540B parameter language model performs comparably with +task-specific finetuned models on several tasks, even achieving new state of the art on the challenging +GSM8K benchmark (Cobbe et al., 2021). diff --git a/benchmark/perfect_context/3 Experimental Results.txt b/benchmark/perfect_context/3 Experimental Results.txt new file mode 100644 index 0000000..d4b19ae --- /dev/null +++ b/benchmark/perfect_context/3 Experimental Results.txt @@ -0,0 +1,91 @@ +First, we make a toy experiment where we fit a neural +network to: y = x2 equation. The neural network has 3 +dense layers with 2 filters each, except for last layer which +has 1 filter. The network uses leaky-ReLU activations after +fully connected layers, except for the last layer which has +no post-activation. We have used negative slope of 0.3 for +leaky-ReLU which is the default value in Tensorflow [1]. +The network was trained with 5000 (x, y) pairs where x was +regularly sampled from [−2.5, 2.5] interval. Fig. 2 shows +the decision tree corresponding to the neural network. In the +tree, every black rectangle box indicates a rule, left child +from the box means the rule does not hold, and the right +child means the rule holds. For better visualization, the +rules are obtained via converting wT x + β > 0 to direct +inequalities acting on x. This can be done for the partic- +ular regression y = x2, since x is a scalar. In every leaf, +the network applies a linear function -indicated by a red +rectangle- based on the decisions so far. We have avoided +writing these functions explicitly due to limited space. At +first glance, the tree representation of a neural network in +this example seems large due to the 2∑n−2 +i mi = 24 = 16 +categorizations. However, we notice that a lot of the rules +in the decision tree is redundant, and hence some paths in +the decision tree becomes invalid. An example to redundant +rule is checking x < 0.32 after x < −1.16 rule holds. This +directly creates the invalid left child for this node. Hence, +the tree can be cleaned via removing the left child in this +case, and merging the categorization rule to the stricter one : +x < −1.16 in the particular case. Via cleaning the decision +tree in Fig. 2, we obtain the simpler tree in Fig. 3a, which +only consists of 5 categories instead of 16. The 5 categories +are directly visible also from the model response in Fig. 3b. +The interpretation of the neural network is thus straightfor- +ward: for each region whose boundaries are determined via +the decision tree representation, the network approximates +the non-linear y = x2 equation by a linear equation. One +can clearly interpret and moreover make deduction from the +decision tree, some of which are as follows. The neural +network is unable to grasp the symmetrical nature of the +regression problem which is evident from the fact that the +decision boundaries are asymmetrical. The region in below +−1.16 and above 1 is unbounded and thus neural decisions +lose accuracy as x goes beyond these boundaries. + +y = x2 Half-Moon +Param. Comp. Mult./Add. Param. Comp. Mult./Add. +Tree 14 2.6 2 39 4.1 8.2 +NN 13 4 16 15 5 25 +Table 1. Computation and memory analysis of toy problems + +Next, we investigate another toy problem of classifying +half-moons and analyse the decision tree produced by a neu- +ral network. We train a fully connected neural network with +3 layers with leaky-ReLU activations, except for last layer +which has sigmoid activation. Each layer has 2 filters ex- +cept for the last layer which has 1. The cleaned decision +tree induced by the trained network is shown in Fig. 4. The +decision tree finds many categories whose boundaries are +determined by the rules in the tree, where each category +is assigned a single class. In order to better visualize the +categories, we illustrate them with different colors in Fig. +5. One can make several deductions from the decision tree +such as some regions are very well-defined, bounded and +the classifications they make are perfectly in line with the +training data, thus these regions are very reliable. There are +unbounded categories which help obtaining accurate classi- +fication boundaries, yet fail to provide a compact represen- +tation of the training data, these may correspond to inaccu- +rate extrapolations made by neural decisions. There are also +some categories that emerged although none of the training +data falls to them. +Besides the interpretability aspect, the decision tree rep- +resentation also provides some computational advantages. +In Table 1, we compare the number of parameters, float- +point comparisons and multiplication or addition operations +of the neural network and the tree induced by it. Note that +the comparisons, multiplications and additions in the tree +representation are given as expected values, since per each +category depth of the tree is different. As the induced tree +is an unfolding of the neural network, it covers all possi- +ble routes and keeps all possible effective filters in mem- +ory. Thus, as expected, the number of parameters in the tree +representation of a neural network is larger than that of the +network. In the induced tree, in every layer i, a maximum +of mi filters are applied directly on the input, whereas in the +neural network always mi filters are applied on the previous +feature, which is usually much larger than the input in the +feature dimension. Thus, computation-wise, the tree repre- +sentation is advantageous compared to the neural network +one. diff --git a/benchmark/perfect_context/3 Model Architecture.txt b/benchmark/perfect_context/3 Model Architecture.txt new file mode 100644 index 0000000..37a867b --- /dev/null +++ b/benchmark/perfect_context/3 Model Architecture.txt @@ -0,0 +1,9 @@ +Most competitive neural sequence transduction models have an encoder-decoder structure [ 5, 2 , 35]. +Here, the encoder maps an input sequence of symbol representations (x1, ..., xn) to a sequence +of continuous representations z = (z1, ..., zn). Given z, the decoder then generates an output +sequence (y1, ..., ym) of symbols one element at a time. At each step the model is auto-regressive +[10], consuming the previously generated symbols as additional input when generating the next. + +The Transformer follows this overall architecture using stacked self-attention and point-wise, fully +connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1, +respectively. diff --git a/benchmark/perfect_context/3.1 Encoder and Decoder Stacks.txt b/benchmark/perfect_context/3.1 Encoder and Decoder Stacks.txt new file mode 100644 index 0000000..c391380 --- /dev/null +++ b/benchmark/perfect_context/3.1 Encoder and Decoder Stacks.txt @@ -0,0 +1,15 @@ +Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two +sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- +wise fully connected feed-forward network. We employ a residual connection [ 11 ] around each of +the two sub-layers, followed by layer normalization [1]. That is, the output of each sub-layer is +LayerNorm(x + Sublayer(x)), where Sublayer(x) is the function implemented by the sub-layer +itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding +layers, produce outputs of dimension dmodel = 512. + +Decoder: The decoder is also composed of a stack of N = 6 identical layers. In addition to the two +sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head +attention over the output of the encoder stack. Similar to the encoder, we employ residual connections +around each of the sub-layers, followed by layer normalization. We also modify the self-attention +sub-layer in the decoder stack to prevent positions from attending to subsequent positions. This +masking, combined with fact that the output embeddings are offset by one position, ensures that the +predictions for position i can depend only on the known outputs at positions less than i. diff --git a/benchmark/perfect_context/3.1 Experimental Setup.txt b/benchmark/perfect_context/3.1 Experimental Setup.txt new file mode 100644 index 0000000..af2fc18 --- /dev/null +++ b/benchmark/perfect_context/3.1 Experimental Setup.txt @@ -0,0 +1,32 @@ +We explore chain-of-thought prompting for various language models on multiple benchmarks. +Benchmarks. We consider the following five math word problem benchmarks: (1) the GSM8K +benchmark of math word problems (Cobbe et al., 2021), (2) the SVAMP dataset of math word +problems with varying structures (Patel et al., 2021), (3) the ASDiv dataset of diverse math word +problems (Miao et al., 2020), (4) the AQuA dataset of algebraic word problems, and (5) the MAWPS +benchmark (Koncel-Kedziorski et al., 2016). Example problems are given in Appendix Table 12 +Standard prompting. For the baseline, we consider standard few-shot prompting, popularized by +Brown et al. (2020), in which a language model is given in-context exemplars of input–output pairs +before outputting a prediction for a test-time example. Exemplars are formatted as questions and +answers. The model gives the answer directly, as shown in Figure 1 (left). +Chain-of-thought prompting. Our proposed approach is to augment each exemplar in few-shot +prompting with a chain of thought for an associated answer, as illustrated in Figure 1 (right). As most +of the datasets only have an evaluation split, we manually composed a set of eight few-shot exemplars +with chains of thought for prompting—Figure 1 (right) shows one chain of thought exemplar, and the +full set of exemplars is given in Appendix Table 20. (These particular exemplars did not undergo +prompt engineering; robustness is studied in Section 3.4 and Appendix A.2.) To investigate whether +chain-of-thought prompting in this form can successfully elicit successful reasoning across a range of +math word problems, we used this single set of eight chain of thought exemplars for all benchmarks +except AQuA, which is multiple choice instead of free response. For AQuA, we used four exemplars +and solutions from the training set, as given in Appendix Table 21. +Language models. We evaluate five large language models. The first is GPT-3 (Brown et al., +2020), for which we use text-ada-001, text-babbage-001, text-curie-001, and text-davinci-002, which +presumably correspond to InstructGPT models of 350M, 1.3B, 6.7B, and 175B parameters (Ouyang +et al., 2022).The second is LaMDA (Thoppilan et al., 2022), which has models of 422M, 2B, 8B, +68B, and 137B parameters. The third is PaLM, which has models of 8B, 62B, and 540B parameters. +The fourth is UL2 20B (Tay et al., 2022), and the fifth is Codex (Chen et al., 2021, code-davinci-002 +in the OpenAI API). We sample from the models via greedy decoding (though follow-up work shows +chain-of-thought prompting can be improved by taking the majority final answer over many sampled +generations (Wang et al., 2022a)). For LaMDA, we report averaged results over five random seeds, +where each seed had a different randomly shuffled order of exemplars. As LaMDA experiments +did not show large variance among different seeds, to save compute we report results for a single +exemplar order for all other models. diff --git a/benchmark/perfect_context/3.2 Results.txt b/benchmark/perfect_context/3.2 Results.txt new file mode 100644 index 0000000..a0d2ebb --- /dev/null +++ b/benchmark/perfect_context/3.2 Results.txt @@ -0,0 +1,50 @@ +The strongest results of chain-of-thought prompting are summarized in Figure 4, with all experimental +outputs for each model collection, model size, and benchmark shown in Table 2 in the Appendix. +There are three key takeaways. First, Figure 4 shows that chain-of-thought prompting is an emergent +ability of model scale (Wei et al., 2022b). That is, chain-of-thought prompting does not positively +impact performance for small models, and only yields performance gains when used with models of +∼100B parameters. We qualitatively found that models of smaller scale produced fluent but illogical +chains of thought, leading to lower performance than standard prompting. +Second, chain-of-thought prompting has larger +performance gains for more-complicated prob- +lems. For instance, for GSM8K (the dataset +with the lowest baseline performance), perfor- +mance more than doubled for the largest GPT +and PaLM models. On the other hand, for Sin- +gleOp, the easiest subset of MAWPS which only +requires a single step to solve, performance im- +provements were either negative or very small +(see Appendix Table 3). +Third, chain-of-thought prompting via GPT-3 +175B and PaLM 540B compares favorably to +prior state of the art, which typically finetunes a +task-specific model on a labeled training dataset. +Figure 4 shows how PaLM 540B uses chain-of- +thought prompting to achieve new state of the art +on GSM8K, SVAMP, and MAWPS (though note +that standard prompting already passed the prior +best for SVAMP). On the other two datasets, +AQuA and ASDiv, PaLM with chain-of-thought +prompting reaches within 2% of the state of the +art (Appendix Table 2). +To better understand why chain-of-thought +prompting works, we manually examined model- +generated chains of thought by LaMDA 137B +for GSM8K. Of 50 random examples where the +model returned the correct final answer, all of +the generated chains of thought were also log- +ically and mathematically correct except two +that coincidentally arrived at the correct answer +(see Appendix D.1, and Table 8 for examples +of correct model-generated chains of thought). +We also randomly examined 50 random sam- +ples for which the model gave the wrong answer. +The summary of this analysis is that 46% of the +chains of thought were almost correct, barring +minor mistakes (calculator error, symbol map- +ping error, or one reasoning step missing), and that the other 54% of the chains of thought had major +errors in semantic understanding or coherence (see Appendix D.2). To provide a small insight into +why scaling improves chain-of-thought reasoning ability, we performed a similar analysis of errors +made by PaLM 62B and whether those errors were fixed by scaling to PaLM 540B. The summary +is that scaling PaLM to 540B fixes a large portion of one-step missing and semantic understanding +errors in the 62B model (see Appendix A.1). diff --git a/benchmark/perfect_context/3.2.2 Multi-Head Attention.txt b/benchmark/perfect_context/3.2.2 Multi-Head Attention.txt new file mode 100644 index 0000000..cc89ee9 --- /dev/null +++ b/benchmark/perfect_context/3.2.2 Multi-Head Attention.txt @@ -0,0 +1,21 @@ +Instead of performing a single attention function with dmodel-dimensional keys, values and queries, +we found it beneficial to linearly project the queries, keys and values h times with different, learned +linear projections to dk, dk and dv dimensions, respectively. On each of these projected versions of +queries, keys and values we then perform the attention function in parallel, yielding dv -dimensional +output values. These are concatenated and once again projected, resulting in the final values, as +depicted in Figure 2. +Multi-head attention allows the model to jointly attend to information from different representation +subspaces at different positions. With a single attention head, averaging inhibits this. +MultiHead(Q, K, V ) = Concat(head1, ..., headh)W O +where headi = Attention(QW Q +i , KW K +i , V W V +i ) +Where the projections are parameter matrices W Q +i ∈ Rdmodel×dk , W K +i ∈ Rdmodel×dk , W V +i ∈ Rdmodel×dv +and W O ∈ Rhdv ×dmodel . +In this work we employ h = 8 parallel attention layers, or heads. For each of these we use +dk = dv = dmodel/h = 64. Due to the reduced dimension of each head, the total computational cost +is similar to that of single-head attention with full dimensionality diff --git a/benchmark/perfect_context/3.4 Embeddings and Softmax.txt b/benchmark/perfect_context/3.4 Embeddings and Softmax.txt new file mode 100644 index 0000000..8f170c2 --- /dev/null +++ b/benchmark/perfect_context/3.4 Embeddings and Softmax.txt @@ -0,0 +1,5 @@ +Similarly to other sequence transduction models, we use learned embeddings to convert the input +tokens and output tokens to vectors of dimension dmodel. We also use the usual learned linear transfor- +mation and softmax function to convert the decoder output to predicted next-token probabilities. In +our model, we share the same weight matrix between the two embedding layers and the pre-softmax +linear transformation, similar to [ 30 ]. In the embedding layers, we multiply those weights by √dmode diff --git a/benchmark/perfect_context/3.4 Robustness of Chain of Thought.txt b/benchmark/perfect_context/3.4 Robustness of Chain of Thought.txt new file mode 100644 index 0000000..4aef88e --- /dev/null +++ b/benchmark/perfect_context/3.4 Robustness of Chain of Thought.txt @@ -0,0 +1,33 @@ +Sensitivity to exemplars is a key consideration of prompt- +ing approaches—for instance, varying the permutation of +few-shot exemplars can cause the accuracy of GPT-3 on +SST-2 to range from near chance (54.3%) to near state of +the art (93.4%) (Zhao et al., 2021). In this final subsec- +tion, we evaluate robustness to chains of thought written +by different annotators. In addition to the results above, +which used chains of thought written by an Annotator +A, two other co-authors of this paper (Annotators B and +C) independently wrote chains of thought for the same +few-shot exemplars (shown in Appendix H). Annotator A +also wrote another chain of thought that was more concise +than the original, following the style of solutions given in +Cobbe et al. (2021).1 +Figure 6 shows these results for LaMDA 137B on GSM8K +and MAWPS (ablation results for other datasets are given +in Appendix Table 6 / Table 7). Although there is variance +among different chain of thought annotations, as would be +expected when using exemplar-based prompting (Le Scao +and Rush, 2021; Reynolds and McDonell, 2021; Zhao +et al., 2021), all sets of chain of thought prompts outper- +form the standard baseline by a large margin. This result +implies that successful use of chain of thought does not +depend on a particular linguistic style. +To confirm that successful chain-of-thought prompting +works for other sets of exemplars, we also run experiments +with three sets of eight exemplars randomly sampled from the GSM8K training set, an independent +source (examples in this dataset already included reasoning steps like a chain of thought).2 Fig- +ure 6 shows that these prompts performed comparably with our manually written exemplars, also +substantially outperforming standard prompting. +In addition to robustness to annotators, independently-written chains of thought, different exemplars, +and various language models, we also find that chain-of-thought prompting for arithmetic reasoning +is robust to different exemplar orders and varying numbers of exemplars (see Appendix A.2). diff --git a/benchmark/perfect_context/3.5 Positional Encoding.txt b/benchmark/perfect_context/3.5 Positional Encoding.txt new file mode 100644 index 0000000..2db9e24 --- /dev/null +++ b/benchmark/perfect_context/3.5 Positional Encoding.txt @@ -0,0 +1,18 @@ +Since our model contains no recurrence and no convolution, in order for the model to make use of the +order of the sequence, we must inject some information about the relative or absolute position of the +tokens in the sequence. To this end, we add "positional encodings" to the input embeddings at the +bottoms of the encoder and decoder stacks. The positional encodings have the same dimension dmodel +as the embeddings, so that the two can be summed. There are many choices of positional encodings, +learned and fixed [9]. +In this work, we use sine and cosine functions of different frequencies: +P E(pos,2i) = sin(pos/100002i/dmodel ) +P E(pos,2i+1) = cos(pos/100002i/dmodel ) +where pos is the position and i is the dimension. That is, each dimension of the positional encoding +corresponds to a sinusoid. The wavelengths form a geometric progression from 2π to 10000 · 2π. We +chose this function because we hypothesized it would allow the model to easily learn to attend by +relative positions, since for any fixed offset k, P Epos+k can be represented as a linear function of +P Epos. +We also experimented with using learned positional embeddings [9] instead, and found that the two +versions produced nearly identical results (see Table 3 row (E)). We chose the sinusoidal version +because it may allow the model to extrapolate to sequence lengths longer than the ones encountered +during training. diff --git a/benchmark/perfect_context/4 OUR METHOD.txt b/benchmark/perfect_context/4 OUR METHOD.txt new file mode 100644 index 0000000..35c39ea --- /dev/null +++ b/benchmark/perfect_context/4 OUR METHOD.txt @@ -0,0 +1,3 @@ +We describe the simple design of LoRA and its practical benefits. The principles outlined here apply +to any dense layers in deep learning models, though we only focus on certain weights in Transformer +language models in our experiments as the motivating use case. diff --git a/benchmark/perfect_context/4.1. Natural lighting.txt b/benchmark/perfect_context/4.1. Natural lighting.txt new file mode 100644 index 0000000..c4ef0e4 --- /dev/null +++ b/benchmark/perfect_context/4.1. Natural lighting.txt @@ -0,0 +1,7 @@ +Natural light is required for all permanent work stations and for restaurants. +As a general rule, preference should be given to the use of natural lighting; at least 80% of the +surface area of offices should have a daylight factor (according to the International Commission +on Illumination (CIE)) of 1.5% for façades without exterior obstructions, and 0.7% for other +façades16. +See Section I.2.4. Electricity and lighting, paragraph 1 on the conditions which apply to +artificial lighting. diff --git a/benchmark/perfect_context/5 Symbolic Reasoning.txt b/benchmark/perfect_context/5 Symbolic Reasoning.txt new file mode 100644 index 0000000..0f8ff3a --- /dev/null +++ b/benchmark/perfect_context/5 Symbolic Reasoning.txt @@ -0,0 +1,43 @@ +Our final experimental evaluation considers symbolic rea- +soning, which is simple for humans but potentially chal- +lenging for language models. We show that chain-of- +thought prompting not only enables language models to +perform symbolic reasoning tasks that are challenging in +the standard prompting setting, but also facilitates length +generalization to inference-time inputs longer than those +seen in the few-shot exemplars. +Tasks. We use the following two toy tasks. +• Last letter concatenation. This task asks the model +to concatenate the last letters of words in a name (e.g., +“Amy Brown” → “yn”). It is a more challenging version +of first letter concatenation, which language models can +already perform without chain of thought.3 We generate +full names by randomly concatenating names from the +top one-thousand first and last names from name census +data (https://namecensus.com/). +• Coin flip. This task asks the model to answer whether a +coin is still heads up after people either flip or don’t flip +the coin (e.g., “A coin is heads up. Phoebe flips the coin. +Osvaldo does not flip the coin. Is the coin still heads up?” +→ “no”). +As the construction of these symbolic reasoning tasks is +well-defined, for each task we consider an in-domain test +set for which examples had the same number of steps as +the training/few-shot exemplars, as well as an out-of-domain (OOD) test set, for which evaluation +examples had more steps than those in the exemplars. For last letter concatenation, the model only +sees exemplars of names with two words, and then performs last letter concatenation on names with 3 +and 4 words.4 We do the same for the number of potential flips in the coin flip task. Our experimental +setup uses the same methods and models as in the prior two sections. We again manually compose +chains of thought for the few-shot exemplars for each task, which are given in Figure 3 +Results. The results of these in-domain and OOD evaluations are shown in Figure 8 for PaLM, +with results for LaMDA shown in Appendix Table 5. With PaLM 540B, chain-of-thought prompting +leads to almost 100% solve rates (note that standard prompting already solves coin flip with PaLM +540, though not for LaMDA 137B). Note that these in-domain evaluations are “toy tasks” in the +sense that perfect solution structures are already provided by the chains of thought in the few-shot +exemplars; all the model has to do is repeat the same steps with the new symbols in the test-time +example. And yet, small models still fail—the ability to perform abstract manipulations on unseen +symbols for these three tasks only arises at the scale of 100B model parameters. +As for the OOD evaluations, standard prompting fails for both tasks. With chain-of-thought prompting, +language models achieve upward scaling curves (though performance is lower than in the in-domain +setting). Hence, chain-of-thought prompting facilitates length generalization beyond seen chains of +thought for language models of sufficient scale. diff --git a/benchmark/perfect_context/5.2 Hardware and Schedule.txt b/benchmark/perfect_context/5.2 Hardware and Schedule.txt new file mode 100644 index 0000000..077d3c0 --- /dev/null +++ b/benchmark/perfect_context/5.2 Hardware and Schedule.txt @@ -0,0 +1,5 @@ +We trained our models on one machine with 8 NVIDIA P100 GPUs. For our base models using +the hyperparameters described throughout the paper, each training step took about 0.4 seconds. We +trained the base models for a total of 100,000 steps or 12 hours. For our big models,(described on the +bottom line of table 3), step time was 1.0 seconds. The big models were trained for 300,000 steps +(3.5 days). diff --git a/benchmark/perfect_context/5.2. Thread Hierarchy.txt b/benchmark/perfect_context/5.2. Thread Hierarchy.txt new file mode 100644 index 0000000..464117c --- /dev/null +++ b/benchmark/perfect_context/5.2. Thread Hierarchy.txt @@ -0,0 +1,75 @@ +For convenience, threadIdx is a 3-component vector, so that threads can be identified using a one- +dimensional, two-dimensional, or three-dimensional thread index, forming a one-dimensional, two- +dimensional, or three-dimensional block of threads, called a thread block. This provides a natural way +to invoke computation across the elements in a domain such as a vector, matrix, or volume. +The index of a thread and its thread ID relate to each other in a straightforward way: For a one- +dimensional block, they are the same; for a two-dimensional block of size (Dx, Dy), the thread ID of +a thread of index (x, y) is (x + y Dx); for a three-dimensional block of size (Dx, Dy, Dz), the thread ID of a +thread of index (x, y, z) is (x + y Dx + z Dx Dy). +As an example, the following code adds two matrices A and B of size NxN and stores the result into +matrix C. +∕∕ Kernel definition +__global__ void MatAdd(float A[N][N], float B[N][N], +float C[N][N]) +{ +int i = threadIdx.x; +int j = threadIdx.y; +C[i][j] = A[i][j] + B[i][j]; +} +int main() +{ +... +∕∕ Kernel invocation with one block of N * N * 1 threads +int numBlocks = 1; +dim3 threadsPerBlock(N, N); +MatAdd<<>>(A, B, C); +... +} +There is a limit to the number of threads per block, since all threads of a block are expected to reside +on the same streaming multiprocessor core and must share the limited memory resources of that +core. On current GPUs, a thread block may contain up to 1024 threads. +However, a kernel can be executed by multiple equally-shaped thread blocks, so that the total number +of threads is equal to the number of threads per block times the number of blocks. +Blocks are organized into a one-dimensional, two-dimensional, or three-dimensional grid of thread +blocks as illustrated by Figure 4. The number of thread blocks in a grid is usually dictated by the size +of the data being processed, which typically exceeds the number of processors in the system. +The number of threads per block and the number of blocks per grid specified in the <<<...>>> syntax +can be of type int or dim3. Two-dimensional blocks or grids can be specified as in the example above. +Each block within the grid can be identified by a one-dimensional, two-dimensional, or three- +dimensional unique index accessible within the kernel through the built-in blockIdx variable. The +dimension of the thread block is accessible within the kernel through the built-in blockDim variable. +Extending the previous MatAdd() example to handle multiple blocks, the code becomes as follows. +∕∕ Kernel definition +__global__ void MatAdd(float A[N][N], float B[N][N], +float C[N][N]) +{ +int i = blockIdx.x * blockDim.x + threadIdx.x; +int j = blockIdx.y * blockDim.y + threadIdx.y; +if (i < N && j < N) +C[i][j] = A[i][j] + B[i][j]; +} +int main() +{ +... +∕∕ Kernel invocation +dim3 threadsPerBlock(16, 16); +dim3 numBlocks(N ∕ threadsPerBlock.x, N ∕ threadsPerBlock.y); +MatAdd<<>>(A, B, C); +... +} +A thread block size of 16x16 (256 threads), although arbitrary in this case, is a common choice. The +grid is created with enough blocks to have one thread per matrix element as before. For simplicity, +this example assumes that the number of threads per grid in each dimension is evenly divisible by the +number of threads per block in that dimension, although that need not be the case. +Thread blocks are required to execute independently. It must be possible to execute blocks in any +order, in parallel or in series. This independence requirement allows thread blocks to be scheduled in +any order and across any number of cores as illustrated by Figure 3, enabling programmers to write +code that scales with the number of cores. +Threads within a block can cooperate by sharing data through some shared memory and by synchroniz- +ing their execution to coordinate memory accesses. More precisely, one can specify synchronization +points in the kernel by calling the __syncthreads() intrinsic function; __syncthreads() acts as a +barrier at which all threads in the block must wait before any is allowed to proceed. Shared Memory +gives an example of using shared memory. In addition to __syncthreads(), the Cooperative Groups +API provides a rich set of thread-synchronization primitives. +For efficient cooperation, shared memory is expected to be a low-latency memory near each processor +core (much like an L1 cache) and __syncthreads() is expected to be lightweight. diff --git a/benchmark/perfect_context/5.3 Optimizer.txt b/benchmark/perfect_context/5.3 Optimizer.txt new file mode 100644 index 0000000..ac15d4a --- /dev/null +++ b/benchmark/perfect_context/5.3 Optimizer.txt @@ -0,0 +1,7 @@ +We used the Adam optimizer [20] with β1 = 0.9, β2 = 0.98 and ϵ = 10−9. We varied the learning +rate over the course of training, according to the formula: +lrate = d−0.5 +model · min(step_num−0.5, step_num · warmup_steps−1.5) (3) +This corresponds to increasing the learning rate linearly for the first warmup_steps training steps, +and decreasing it thereafter proportionally to the inverse square root of the step number. We used +warmup_steps = 4000. diff --git a/benchmark/perfect_context/5.4 Regularization.txt b/benchmark/perfect_context/5.4 Regularization.txt new file mode 100644 index 0000000..bf0df4b --- /dev/null +++ b/benchmark/perfect_context/5.4 Regularization.txt @@ -0,0 +1,7 @@ +We employ three types of regularization during training: +Residual Dropout We apply dropout [ 33] to the output of each sub-layer, before it is added to the +sub-layer input and normalized. In addition, we apply dropout to the sums of the embeddings and the +positional encodings in both the encoder and decoder stacks. For the base model, we use a rate of +Pdrop = 0.1. +Label Smoothing During training, we employed label smoothing of value ϵls = 0.1 [ 36 ]. This +hurts perplexity, as the model learns to be more unsure, but improves accuracy and BLEU score diff --git a/benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt b/benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt new file mode 100644 index 0000000..7d2962a --- /dev/null +++ b/benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt @@ -0,0 +1,11 @@ +As a final stress test for LoRA, we scale up to GPT-3 with 175 billion parameters. Due to the high +training cost, we only report the typical standard deviation for a given task over random seeds, as +opposed to providing one for every entry. See Section D.4 for details on the hyperparameters used. +As shown in Table 4, LoRA matches or exceeds the fine-tuning baseline on all three datasets. Note +that not all methods benefit monotonically from having more trainable parameters, as shown in Fig- +ure 2. We observe a significant performance drop when we use more than 256 special tokens for +prefix-embedding tuning or more than 32 special tokens for prefix-layer tuning. This corroborates +similar observations in Li & Liang (2021). While a thorough investigation into this phenomenon +is out-of-scope for this work, we suspect that having more special tokens causes the input distri- +bution to shift further away from the pre-training data distribution. Separately, we investigate the +performance of different adaptation approaches in the low-data regime in Section F.3 diff --git a/benchmark/perfect_context/6.1.1. Compilation Workflow.txt b/benchmark/perfect_context/6.1.1. Compilation Workflow.txt new file mode 100644 index 0000000..6401a2e --- /dev/null +++ b/benchmark/perfect_context/6.1.1. Compilation Workflow.txt @@ -0,0 +1,30 @@ +6.1.1.1 Offline Compilation +Source files compiled with nvcc can include a mix of host code (i.e., code that executes on the host) +and device code (i.e., code that executes on the device). nvcc’s basic workflow consists in separating +device code from host code and then: +▶ compiling the device code into an assembly form (PTX code) and/or binary form (cubin object), +▶ and modifying the host code by replacing the <<<...>>> syntax introduced in Kernels (and de- +scribed in more details in Execution Configuration) by the necessary CUDA runtime function calls +to load and launch each compiled kernel from the PTX code and/or cubin object. +The modified host code is output either as C++ code that is left to be compiled using another tool or +as object code directly by letting nvcc invoke the host compiler during the last compilation stage. +Applications can then: +▶ Either link to the compiled host code (this is the most common case), +▶ Or ignore the modified host code (if any) and use the CUDA driver API (see Driver API) to load and +execute the PTX code or cubin object. +6.1.1.2 Just-in-Time Compilation +Any PTX code loaded by an application at runtime is compiled further to binary code by the device +driver. This is called just-in-time compilation. Just-in-time compilation increases application load time, +but allows the application to benefit from any new compiler improvements coming with each new +device driver. It is also the only way for applications to run on devices that did not exist at the time the +application was compiled, as detailed in Application Compatibility. +When the device driver just-in-time compiles some PTX code for some application, it automatically +caches a copy of the generated binary code in order to avoid repeating the compilation in subsequent +invocations of the application. The cache - referred to as compute cache - is automatically invalidated +when the device driver is upgraded, so that applications can benefit from the improvements in the +new just-in-time compiler built into the device driver. +Environment variables are available to control just-in-time compilation as described in CUDA Environ- +ment Variables +As an alternative to using nvcc to compile CUDA C++ device code, NVRTC can be used to compile +CUDA C++ device code to PTX at runtime. NVRTC is a runtime compilation library for CUDA C++; more +information can be found in the NVRTC User guide. diff --git a/benchmark/perfect_context/ACTION PHASE.txt b/benchmark/perfect_context/ACTION PHASE.txt new file mode 100644 index 0000000..cc967b1 --- /dev/null +++ b/benchmark/perfect_context/ACTION PHASE.txt @@ -0,0 +1,28 @@ +This is the main phase of the game in which players take +their actions to build Locations, use their Clan action +pawn to activate Action tiles, and use actions of different +Locations. +Starting with the First player and continuing clockwise, +each player performs one action at a time. Each player can +take any available action or pass. +AVAILABLE ACTIONS: +> Build a Location +> Use a Clan action pawn +> Raid an opponent +> Use an action from a Location + +All actions are described in detail in a separate chapter +(see pages 9-11). +> Once a player passes in the Action phase they cannot +perform any additional actions in the current round. +Neither can they be targeted by the actions of other +players. For example, a player who has passed cannot +have any of his Locations Raided. +> There is no limit to the number, type, or order of +actions a player may take during the Action phase, so +long as they perform them one action at a time. +> The Action phase continues until all players have +passed. If any player has reached or passed the 25 +during this phase, the last round is triggered and the +game will end upon the completion of the Expedition +phase of this round. diff --git a/benchmark/perfect_context/CARBON MONOXIDE DETECTION AND VENTING.txt b/benchmark/perfect_context/CARBON MONOXIDE DETECTION AND VENTING.txt new file mode 100644 index 0000000..6bfc1dc --- /dev/null +++ b/benchmark/perfect_context/CARBON MONOXIDE DETECTION AND VENTING.txt @@ -0,0 +1,13 @@ +Operating safety +A carbon monoxide (CO) detector must be installed in closed car parks (indoor or +underground) in accordance with the following requirements: +- the number of carbon monoxide detectors on each level must be sufficient to cover the +entire area of the car park; +- the system must allow automatic control of blower and/or extraction fans and audible and +light indications, on the basis of thresholds stipulated by the Commission; +- the carbon monoxide detection control panel must be fitted with a stand-alone power +source in the form of an integrated battery (providing at least one hour of power). +Maintenance and management +Remote management +he carbon monoxide detection control panel must be linked to the centralised technical +management system GTC (control panel alarms and malfunctions). diff --git a/benchmark/perfect_context/CARD AND TILE COSTS.txt b/benchmark/perfect_context/CARD AND TILE COSTS.txt new file mode 100644 index 0000000..8395847 --- /dev/null +++ b/benchmark/perfect_context/CARD AND TILE COSTS.txt @@ -0,0 +1,39 @@ +Coins +In order to play them, certain cards have a Coin cost that you must pay to the reserve. + +Skills +In order to play them, tiles and the majority of cards require you to have one +or more Skills (see page 5) in your play area. +If you do not have the required Skills, you may pay 1 Coin to the reserve +per missing Skill symbol. + +Notes: +• There is no limit to the number of Skills you may pay for, to the reserve, on your turn. +• If a card does not require a Skill or a Coin, it has no cost, so you may play it for free. + +Landmark tiles have an additional Coin cost equal to the number +of your Fortress pawns already on the central board. +Therefore, the additional cost of your first tile is 0 Coins. + +Chaining +Starting in chapter 2, certain cards may be played for free through their chaining symbol. +If you have, in your play area, a card with a matching symbol in its top-right corner, you may play the card for free, +without having the required Skills. +Note: If you do not have the matching chaining symbol for a card, you may still play it normally by paying its Skill +and/or Coin cost. + +Example: +In chapter 1, you play +this card for free 1 . +In addition to its effect +(see page 5), it has a chaining +symbol 2 . + +n chapter 2, you may play this +card for free since you have +the matching chaining symbol 3 +on one of your played cards. +Otherwise, you would need +to have the required Skills +(or pay 1 Coin per missing Skill +symbol) 4 . diff --git a/benchmark/perfect_context/CARD AND TILE EFFECTS.txt b/benchmark/perfect_context/CARD AND TILE EFFECTS.txt new file mode 100644 index 0000000..7cde5ec --- /dev/null +++ b/benchmark/perfect_context/CARD AND TILE EFFECTS.txt @@ -0,0 +1,31 @@ +Grey cards provide Skills that let you play other cards and tiles in your play area. +You gain 1 Skill per symbol shown. Each symbol may only be used once per turn, on each of your turns. +Ruse Strength Courage Knowledge Leadership +When multiple Skills are separated by a , you may only use one of them per turn (you choose). + +Yellow cards immediately provide Coins that you will be able to spend to play other cards and tiles +in your play area. +Take, from the reserve, the number of Coins shown in the symbol. + +Blue cards immediately let you advance on the Quest of the Ring track. +Move your character along the Quest of the Ring track, one space per Ring symbol. + +Green cards represent the Races of Middle-earth with whom you may ally: +Elves Ents Hobbits Humans Dwarves Wizards + +Red cards immediately let you place Units in the regions of Middle-earth (see page 7). +Choose one of the two regions shown by the banners and place all Units in the chosen region. +Number of Units to place +Choice of regions where you may place Units + +Purple cards (only available in chapter 3) immediately let you complete various maneuvers. +Move 1 of your Units +to an adjacent region. +Your opponent loses +1 Coin. +Remove 1 enemy Unit +from any region + +Landmark tiles immediately let you place Fortresses in regions of Middle-earth (see page 7) and benefit +from unique effects (see the Player Aid). +Region where you may place a Fortress diff --git a/benchmark/perfect_context/CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?.txt b/benchmark/perfect_context/CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?.txt new file mode 100644 index 0000000..2e75747 --- /dev/null +++ b/benchmark/perfect_context/CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?.txt @@ -0,0 +1,35 @@ +IN ADDITION TO DECIDING HOW “OPEN” YOU +want to make your work, you will also need to decide +where you will make your work openly accessible. This +involves first deciding which open access model (or +models) you will use to disseminate your work. Then, +you need to decide what publication venue (or venues) +within that model best suits your needs. +Open access models are generally divided into two +categories: “Gold Open Access” and “Green Open Access.” +Gold Open Access describes the model by which an open +access publisher makes your work openly accessible. If +you opt to use the Gold Open Access model, you will then +need to decide what open access publisher provides the +best venue for your work. In contrast, Green Open Access +(also called “self-archiving”) describes the model by +which you as an author make your work openly +64 Understanding Open Access +accessible. If you opt to use the Green Open Access +model, you will then need to select the best online +venue for your work. Some options include your own +website, your departmental website, or an open access +repository. +Gold and Green Open Access are not mutually +exclusive. An author can publish a work with an open +access publisher and upload the same work to an +open access repository or personal website, and vice +versa (depending on the terms of their publishing +agreement). +THIS CHAPTER: +• Describes the key features of Gold Open Access +• Presents factors to consider when selecting an +open access publisher +• Describes the key features of Green Open Access +• Presents factors to consider when deciding where +to self-archive a work. diff --git a/benchmark/perfect_context/CHAPTER OVERVIEW.txt b/benchmark/perfect_context/CHAPTER OVERVIEW.txt new file mode 100644 index 0000000..e6893bc --- /dev/null +++ b/benchmark/perfect_context/CHAPTER OVERVIEW.txt @@ -0,0 +1,43 @@ +Preparing a chapter +At the start of each chapter (1, 2, then 3), place cards from the corresponding chapter in the central play area, +following the diagram below (reminder on the sides of the box). Be careful, since certain cards are placed faceup +and others facedown. Place the 3 remaining cards, facedown, in the discard. + +Turn overview +The Sauron player begins the game, then both players take turns, until the end of the game. +On your turn, you may either take a Chapter card or take a Landmark tile. + +A. Take a Chapter card +From the central play area, choose an available card, +meaning one that is not partially covered by any other cards. +Then, play it in front of you or discard it. + +Play the card in front of you +Pay the card cost, if any (see page 4), and place it +in your play area. You may immediately benefit +from its effect (see page 5). +Note: Stack cards in front of you by color, making sure +you can still see their effects. +Discard the card +Place the card, facedown, in the discard +and take as many Coins from the reserve +as the current chapter: +Chapter 1: 1 Coin +Chapter 2: 2 Coins +Chapter 3: 3 Coins + +Finally, end your turn by revealing any cards that are now available. + +B. Take a Landmark tile +Choose one of the faceup tiles. Pay its cost (see page 4) and place it in your play area. +Immediately place a Fortress pawn on the corresponding region of the central board and benefit +from its other effects (see the Player Aid). +Finally, end your turn without revealing a new tile. + +End of a chapter +A chapter ends once the final card of this chapter has been taken. +Set up the cards for the next chapter according to the corresponding diagram and reveal new Landmark tiles +until there are, if possible, 3 faceup. +Then continue the game as normal. +Note: Since you alternate taking turns, the player who ends a chapter does not begin the next one, unless they are using +a “Take another turn” effect (see the Player Aid). diff --git a/benchmark/perfect_context/CLEANUP PHASE.txt b/benchmark/perfect_context/CLEANUP PHASE.txt new file mode 100644 index 0000000..94b0a65 --- /dev/null +++ b/benchmark/perfect_context/CLEANUP PHASE.txt @@ -0,0 +1,14 @@ +NOTE: Skip the Cleanup phase in the final round. +In this phase: +> Players take all Workers from their Clan tile back to their +supply. +> Players unexhaust all cards (rotate them to their initial position) +in front of them. +> Players take back their Clan action pawns from Action tiles. +> Discard any remaining, face-up Island cards and reveal new ones. +> Pass the First player marker to the next player in clockwise order. +> If no one has reached or passed the 25 on the scoreboard +during the Action phase, begin a new round. If anyone reached +25 or more during the Expedition phase, the next round will +be the last one. +IMPORTANT: Goods are never discarded at the end of the round diff --git a/benchmark/perfect_context/CONQUERING MIDDLE-EARTH.txt b/benchmark/perfect_context/CONQUERING MIDDLE-EARTH.txt new file mode 100644 index 0000000..4dc2810 --- /dev/null +++ b/benchmark/perfect_context/CONQUERING MIDDLE-EARTH.txt @@ -0,0 +1,14 @@ +When you place or move one or more Units, two situations are possible: +If no enemy Unit is present: nothing happens. +If one or more enemy Units are present: trigger a conflict. Each player removes one of their Units +and places it back in front of them. Repeat this until at least one player has no more Units in the region. +Note: An enemy Fortress does not trigger conflicts and does not prevent you from placing your Units in its region. +Therefore, it is possible for both players to be present in the same region. + +When you complete multiple movements, you may move the same Unit multiple times, or split your movement +between multiple Units. For each movement, move a Unit to an adjacent region (one with a connection). +You must complete each movement independently, resolving any conflict triggered, one at a time. + +Example: You play a Purple card that provides 3 movements 1 . The first lets you move a Unit from Enedwaith to Rohan 2 . +Since there is an enemy Unit present, you trigger a conflict and each player removes their Unit. You then use +your second and third movements to send another Unit from Enedwaith to Mordor, passing through Rohan 3 diff --git a/benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt b/benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt new file mode 100644 index 0000000..43cd859 --- /dev/null +++ b/benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt @@ -0,0 +1,13 @@ +1. A general-purpose AI model shall be classified as a general-purpose AI model with systemic risk if it meets any of the +following conditions: +(a) it has high impact capabilities evaluated on the basis of appropriate technical tools and methodologies, including +indicators and benchmarks; +(b) based on a decision of the Commission, ex officio or following a qualified alert from the scientific panel, it has +capabilities or an impact equivalent to those set out in point (a) having regard to the criteria set out in Annex XIII. +2. A general-purpose AI model shall be presumed to have high impact capabilities pursuant to paragraph 1, point (a), +when the cumulative amount of computation used for its training measured in floating point operations is greater than +1025. +3. The Commission shall adopt delegated acts in accordance with Article 97 to amend the thresholds listed in +paragraphs 1 and 2 of this Article, as well as to supplement benchmarks and indicators in light of evolving technological +developments, such as algorithmic improvements or increased hardware efficiency, when necessary, for these thresholds to +reflect the state of the art. diff --git a/benchmark/perfect_context/Codes of practice.txt b/benchmark/perfect_context/Codes of practice.txt new file mode 100644 index 0000000..be7daa0 --- /dev/null +++ b/benchmark/perfect_context/Codes of practice.txt @@ -0,0 +1,39 @@ + ELI: http://data.europa.eu/eli/reg/2024/1689/oj +(a) the means to ensure that the information referred to in Article 53(1), points (a) and (b), is kept up to date in light of +market and technological developments; +(b) the adequate level of detail for the summary about the content used for training; +(c) the identification of the type and nature of the systemic risks at Union level, including their sources, where appropriate; +(d) the measures, procedures and modalities for the assessment and management of the systemic risks at Union level, +including the documentation thereof, which shall be proportionate to the risks, take into consideration their severity +and probability and take into account the specific challenges of tackling those risks in light of the possible ways in +which such risks may emerge and materialise along the AI value chain. +3. The AI Office may invite all providers of general-purpose AI models, as well as relevant national competent +authorities, to participate in the drawing-up of codes of practice. Civil society organisations, industry, academia and other +relevant stakeholders, such as downstream providers and independent experts, may support the process. +4. The AI Office and the Board shall aim to ensure that the codes of practice clearly set out their specific objectives and +contain commitments or measures, including key performance indicators as appropriate, to ensure the achievement of +those objectives, and that they take due account of the needs and interests of all interested parties, including affected +persons, at Union level. +5. The AI Office shall aim to ensure that participants to the codes of practice report regularly to the AI Office on the +implementation of the commitments and the measures taken and their outcomes, including as measured against the key +performance indicators as appropriate. Key performance indicators and reporting commitments shall reflect differences in +size and capacity between various participants. +6. The AI Office and the Board shall regularly monitor and evaluate the achievement of the objectives of the codes of +practice by the participants and their contribution to the proper application of this Regulation. The AI Office and the Board +shall assess whether the codes of practice cover the obligations provided for in Articles 53 and 55, and shall regularly +monitor and evaluate the achievement of their objectives. They shall publish their assessment of the adequacy of the codes +of practice. +The Commission may, by way of an implementing act, approve a code of practice and give it a general validity within the +Union. That implementing act shall be adopted in accordance with the examination procedure referred to in Article 98(2). +7. The AI Office may invite all providers of general-purpose AI models to adhere to the codes of practice. For providers +of general-purpose AI models not presenting systemic risks this adherence may be limited to the obligations provided for in +Article 53, unless they declare explicitly their interest to join the full code. +8. The AI Office shall, as appropriate, also encourage and facilitate the review and adaptation of the codes of practice, in +particular in light of emerging standards. The AI Office shall assist in the assessment of available standards. +9. Codes of practice shall be ready at the latest by 2 May 2025. The AI Office shall take the necessary steps, including +inviting providers pursuant to paragraph 7. +If, by 2 August 2025, a code of practice cannot be finalised, or if the AI Office deems it is not adequate following its +assessment under paragraph 6 of this Article, the Commission may provide, by means of implementing acts, common rules +for the implementation of the obligations provided for in Articles 53 and 55, including the issues set out in paragraph 2 of +this Article. Those implementing acts shall be adopted in accordance with the examination procedure referred to in Article +98(2). diff --git a/benchmark/perfect_context/Compliant AI systems which present a risk.txt b/benchmark/perfect_context/Compliant AI systems which present a risk.txt new file mode 100644 index 0000000..0efeef3 --- /dev/null +++ b/benchmark/perfect_context/Compliant AI systems which present a risk.txt @@ -0,0 +1,20 @@ +1. Where, having performed an evaluation under Article 79, after consulting the relevant national public authority +referred to in Article 77(1), the market surveillance authority of a Member State finds that although a high-risk AI system +complies with this Regulation, it nevertheless presents a risk to the health or safety of persons, to fundamental rights, or to +other aspects of public interest protection, it shall require the relevant operator to take all appropriate measures to ensure +that the AI system concerned, when placed on the market or put into service, no longer presents that risk without undue +delay, within a period it may prescribe. +EN OJ L, 12.7.2024 +108/144 ELI: http://data.europa.eu/eli/reg/2024/1689/oj +2. The provider or other relevant operator shall ensure that corrective action is taken in respect of all the AI systems +concerned that it has made available on the Union market within the timeline prescribed by the market surveillance +authority of the Member State referred to in paragraph 1. +3. The Member States shall immediately inform the Commission and the other Member States of a finding under +paragraph 1. That information shall include all available details, in particular the data necessary for the identification of the +AI system concerned, the origin and the supply chain of the AI system, the nature of the risk involved and the nature and +duration of the national measures taken. +4. The Commission shall without undue delay enter into consultation with the Member States concerned and the +relevant operators, and shall evaluate the national measures taken. On the basis of the results of that evaluation, the +Commission shall decide whether the measure is justified and, where necessary, propose other appropriate measures. +5. The Commission shall immediately communicate its decision to the Member States concerned and to the relevant +operators. It shall also inform the other Member States. diff --git a/benchmark/perfect_context/Data and data governance.txt b/benchmark/perfect_context/Data and data governance.txt new file mode 100644 index 0000000..ad49e6a --- /dev/null +++ b/benchmark/perfect_context/Data and data governance.txt @@ -0,0 +1,49 @@ +1. High-risk AI systems which make use of techniques involving the training of AI models with data shall be developed +on the basis of training, validation and testing data sets that meet the quality criteria referred to in paragraphs 2 to 5 +whenever such data sets are used. +2. Training, validation and testing data sets shall be subject to data governance and management practices appropriate +for the intended purpose of the high-risk AI system. Those practices shall concern in particular: +(a) the relevant design choices; +(b) data collection processes and the origin of data, and in the case of personal data, the original purpose of the data +collection; +(c) relevant data-preparation processing operations, such as annotation, labelling, cleaning, updating, enrichment and +aggregation; +(d) the formulation of assumptions, in particular with respect to the information that the data are supposed to measure and +represent; +(e) an assessment of the availability, quantity and suitability of the data sets that are needed; +(f) examination in view of possible biases that are likely to affect the health and safety of persons, have a negative impact +on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence +inputs for future operations; +(g) appropriate measures to detect, prevent and mitigate possible biases identified according to point (f); +(h) the identification of relevant data gaps or shortcomings that prevent compliance with this Regulation, and how those +gaps and shortcomings can be addressed. +3. Training, validation and testing data sets shall be relevant, sufficiently representative, and to the best extent possible, +free of errors and complete in view of the intended purpose. They shall have the appropriate statistical properties, including, +where applicable, as regards the persons or groups of persons in relation to whom the high-risk AI system is intended to be +used. Those characteristics of the data sets may be met at the level of individual data sets or at the level of a combination +thereof. +4. Data sets shall take into account, to the extent required by the intended purpose, the characteristics or elements that +are particular to the specific geographical, contextual, behavioural or functional setting within which the high-risk AI +system is intended to be used. +OJ L, 12.7.2024 EN +ELI: http://data.europa.eu/eli/reg/2024/1689/oj 57/144 +5. To the extent that it is strictly necessary for the purpose of ensuring bias detection and correction in relation to the +high-risk AI systems in accordance with paragraph (2), points (f) and (g) of this Article, the providers of such systems may +exceptionally process special categories of personal data, subject to appropriate safeguards for the fundamental rights and +freedoms of natural persons. In addition to the provisions set out in Regulations (EU) 2016/679 and (EU) 2018/1725 and +Directive (EU) 2016/680, all the following conditions must be met in order for such processing to occur: +(a) the bias detection and correction cannot be effectively fulfilled by processing other data, including synthetic or +anonymised data; +(b) the special categories of personal data are subject to technical limitations on the re-use of the personal data, and +state-of-the-art security and privacy-preserving measures, including pseudonymisation; +(c) the special categories of personal data are subject to measures to ensure that the personal data processed are secured, +protected, subject to suitable safeguards, including strict controls and documentation of the access, to avoid misuse and +ensure that only authorised persons have access to those personal data with appropriate confidentiality obligations; +(d) the special categories of personal data are not to be transmitted, transferred or otherwise accessed by other parties; +(e) the special categories of personal data are deleted once the bias has been corrected or the personal data has reached the +end of its retention period, whichever comes first; +(f) the records of processing activities pursuant to Regulations (EU) 2016/679 and (EU) 2018/1725 and Directive (EU) +2016/680 include the reasons why the processing of special categories of personal data was strictly necessary to detect +and correct biases, and why that objective could not be achieved by processing other data. +6. For the development of high-risk AI systems not using techniques involving the training of AI models, paragraphs 2 +to 5 apply only to the testing data sets. diff --git a/benchmark/perfect_context/END OF THE GAME.txt b/benchmark/perfect_context/END OF THE GAME.txt new file mode 100644 index 0000000..9ba198c --- /dev/null +++ b/benchmark/perfect_context/END OF THE GAME.txt @@ -0,0 +1,15 @@ +There are three immediate victory conditions: +Quest of the Ring +For the Fellowship: If Frodo and Sam reach Mount Doom, they destroy the One Ring and you immediately +win the game. +For Sauron: If the Nazgûl catch Frodo and Sam, they seize the One Ring and you immediately win the game. +Support of the Races +If you gather 6 different Race symbols on your Green cards, you rally the support of the Races +of Middle-earth and immediately win the game. +Note: The Eagle symbol, present on one Alliance token, is an additional Race symbol that counts as 1 of the 6 required +symbols for the Support of the Races victory. +Conquering Middle-earth +If you are present in all 7 regions (with a Fortress and/or at least 1 Unit), you dominate Middle-earth +and immediately win the game. +If none of these three victory conditions are achieved by the end of chapter 3, the player who is present in the most +regions of Middle-earth (with a Fortress and/or at least 1 Unit) wins the game. In case of tie, share the victory. diff --git a/benchmark/perfect_context/EU declaration of conformity.txt b/benchmark/perfect_context/EU declaration of conformity.txt new file mode 100644 index 0000000..58660d6 --- /dev/null +++ b/benchmark/perfect_context/EU declaration of conformity.txt @@ -0,0 +1,20 @@ +1. The provider shall draw up a written machine readable, physical or electronically signed EU declaration of conformity +for each high-risk AI system, and keep it at the disposal of the national competent authorities for 10 years after the +high-risk AI system has been placed on the market or put into service. The EU declaration of conformity shall identify the +high-risk AI system for which it has been drawn up. A copy of the EU declaration of conformity shall be submitted to the +relevant national competent authorities upon request. +EN OJ L, 12.7.2024 +80/144 ELI: http://data.europa.eu/eli/reg/2024/1689/oj +2. The EU declaration of conformity shall state that the high-risk AI system concerned meets the requirements set out in +Section 2. The EU declaration of conformity shall contain the information set out in Annex V, and shall be translated into +a language that can be easily understood by the national competent authorities of the Member States in which the high-risk +AI system is placed on the market or made available. +3. Where high-risk AI systems are subject to other Union harmonisation legislation which also requires an EU +declaration of conformity, a single EU declaration of conformity shall be drawn up in respect of all Union law applicable to +the high-risk AI system. The declaration shall contain all the information required to identify the Union harmonisation +legislation to which the declaration relates. +4. By drawing up the EU declaration of conformity, the provider shall assume responsibility for compliance with the +requirements set out in Section 2. The provider shall keep the EU declaration of conformity up-to-date as appropriate. +5. The Commission is empowered to adopt delegated acts in accordance with Article 97 in order to amend Annex V by +updating the content of the EU declaration of conformity set out in that Annex, in order to introduce elements that become +necessary in light of technical progress. diff --git a/benchmark/perfect_context/EXPEDITION PHASE.txt b/benchmark/perfect_context/EXPEDITION PHASE.txt new file mode 100644 index 0000000..fd49a78 --- /dev/null +++ b/benchmark/perfect_context/EXPEDITION PHASE.txt @@ -0,0 +1,24 @@ +One after another, starting from the first Ship token placed on the arrow, +players choose a single Island to either Pillage or Conquer for each one of +their Ships. +A player with just a Ship on the Expedition board (no Raze token or Fish +assigned) can choose any face-up Nearby Island, or draw from the top of the +Nearby Island deck. To be able to take any Distant Island card, either face- +up or from the top of the deck, the Ship performing that expedition must have +had a fish assigned to it. Rations are needed for the long journey! +A player can choose to Pillage a selected Island card without any additional +cost to gain the Goods presented on the Pillage space. They then put the +Pillaged card on the appropriate discard pile (for Nearby and Distant Islands). +If a player has assigned a Ship with a Raze token, they can choose to Conquer +an Island and add it to their Empire to gain access to its action and/or ability. +Each Conquered Island also provides 1 VP at the end of the game, just like any +other Location. +Once a player has chosen an Island card to either Pillage or Conquer, they +return the used Ship to their supply and discard any Raze token and/or Fish +assigned to that Ship, even if they didn’t use them. Now, if there is another +Ship in the queue, it is their turn to choose an Island card. +NOTE 1: Some cards Feature effects are triggered when a card is Conquered. +This happens when you move the Island card to your Empire, not when you +assign a Ship during a Sail action (see page 10). +NOTE 2: If there are no cards left in the Island deck, shuffle the discarded +cards to make up the new deck. diff --git a/benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt b/benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt new file mode 100644 index 0000000..c3e0f6e --- /dev/null +++ b/benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt @@ -0,0 +1,29 @@ +1. A European Artificial Intelligence Board (the ‘Board’) is hereby established. +2. The Board shall be composed of one representative per Member State. The European Data Protection Supervisor shall +participate as observer. The AI Office shall also attend the Board’s meetings, without taking part in the votes. Other national +and Union authorities, bodies or experts may be invited to the meetings by the Board on a case by case basis, where the +issues discussed are of relevance for them. +3. Each representative shall be designated by their Member State for a period of three years, renewable once. +4. Member States shall ensure that their representatives on the Board: +(a) have the relevant competences and powers in their Member State so as to contribute actively to the achievement of the +Board’s tasks referred to in Article 66; +(b) are designated as a single contact point vis-à-vis the Board and, where appropriate, taking into account Member States’ +needs, as a single contact point for stakeholders; +(c) are empowered to facilitate consistency and coordination between national competent authorities in their Member State +as regards the implementation of this Regulation, including through the collection of relevant data and information for +the purpose of fulfilling their tasks on the Board. +5. The designated representatives of the Member States shall adopt the Board’s rules of procedure by a two-thirds +majority. The rules of procedure shall, in particular, lay down procedures for the selection process, the duration of the +mandate of, and specifications of the tasks of, the Chair, detailed arrangements for voting, and the organisation of the +Board’s activities and those of its sub-groups. +6. The Board shall establish two standing sub-groups to provide a platform for cooperation and exchange among market +surveillance authorities and notifying authorities about issues related to market surveillance and notified bodies respectively. +The standing sub-group for market surveillance should act as the administrative cooperation group (ADCO) for this +Regulation within the meaning of Article 30 of Regulation (EU) 2019/1020. +The Board may establish other standing or temporary sub-groups as appropriate for the purpose of examining specific +issues. Where appropriate, representatives of the advisory forum referred to in Article 67 may be invited to such sub-groups +or to specific meetings of those subgroups as observers. +7. The Board shall be organised and operated so as to safeguard the objectivity and impartiality of its activities. +8. The Board shall be chaired by one of the representatives of the Member States. The AI Office shall provide the +secretariat for the Board, convene the meetings upon request of the Chair, and prepare the agenda in accordance with the +tasks of the Board pursuant to this Regulation and its rules of procedure. diff --git a/benchmark/perfect_context/Europe.txt b/benchmark/perfect_context/Europe.txt new file mode 100644 index 0000000..bc85f8a --- /dev/null +++ b/benchmark/perfect_context/Europe.txt @@ -0,0 +1,34 @@ +Data on diversity trends about European CS graduates +comes from Informatics Europe. 3 +Informatics, CS, CE, and IT Bachelor’s +Graduates +In the majority of surveyed European nations, there +is a persistent gender disparity among bachelor’s- +level graduates in informatics, computer science, +computer engineering, and information technology. +Despite some narrowing since 2011, men continue to +dominate. For example, France (14.8%), the United +Kingdom (17.8%), and Germany (21.5%) show relatively +low proportions of female graduates in these fields +(Figure 8.1.15). Bulgaria stands out among the surveyed +countries with the highest proportion of female +graduates (35.2%). +Informatics, CS, CE, and IT Master’s Graduates +Similar gender disparities are observed among +European informatics, CS, CE, and IT master’s +graduates, with a significantly greater proportion of +males than females in most surveyed countries. As of +2022, Estonia (42.0%), Romania (41.9%), and Bulgaria +(40.4%) reported the greatest proportion of female +master’s graduates (Figure 8.1.16). In contrast, Belgium +(13.7%), Italy (14.1%), and Switzerland (15.8%) reported +the smallest proportion of female master’s graduates. +Informatics, CS, CE, and IT PhD Graduates +In all surveyed European countries, informatics, +CS, CE, and IT PhD graduates are predominantly +male. However, in nations such as the United +Kingdom, Germany, and Switzerland, the gender +gap has narrowed over the last decade, with women +constituting a growing share of PhD graduates (Figure +8.1.17).4 In contrast, countries like Finland and Spain +have seen the gap slightly widen. diff --git a/benchmark/perfect_context/GAME END.txt b/benchmark/perfect_context/GAME END.txt new file mode 100644 index 0000000..d5bd4cd --- /dev/null +++ b/benchmark/perfect_context/GAME END.txt @@ -0,0 +1,20 @@ +The game continues until a player reaches or passes the 25 space +on the scoreboard during the Action phase. Once that happens, +the final round is triggered and the game will end at the end of the +current round. +To calculate the final score, each player should: +> add 1 Victory Point for each card within their Empire to their +current score (including Basic Fields, Field upgrades, and +Conquered Islands), +> add 1 Victory Point for every 2 Resources remaining in their +supply (Resources assigned to cards are not counted towards +this scoring). +> add 1 Victory Point for every 1 Gold remaining in their supply +(Gold tokens assigned to cards are not counted). +The player with the most Victory Points is the winner. +TIES +In case of a tie, the player with the most Locations in their Empire +wins. If the players are still tied, the winner is the player with the +most Workers. If still tied, the winner is the player with the most +cards left in their hand. If there is still a tie, the tied players share +the victory! diff --git a/benchmark/perfect_context/HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?.txt b/benchmark/perfect_context/HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?.txt new file mode 100644 index 0000000..84b1bd6 --- /dev/null +++ b/benchmark/perfect_context/HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?.txt @@ -0,0 +1,92 @@ +In order to select an open access publisher, you will +first need to know the range of open access publish- +ers available to you. For authors of articles, a good +place to start is the Directory of Open Access Journals +(“DOAJ”), an online directory that screens and indexes +over 10,000 peer-reviewed open access journals.41 You +may also consult the Open Access Scholarly Publishers +Association, whose membership includes both article +and monograph open access publishers.42 From there, +you should consider a number of factors to determine +which publisher, if any, best suits your needs, +including: +The Impact (Factor) of the Journal +Regardless of whether a journal is restricted or open +access, authors may find it important to consider the +journal’s “impact factor.” Generally, the impact factor +measures the frequency with which the average article +in a journal is cited over a particular period of time. +Many academics, including tenure committees, use this +metric as a proxy for the prestige, quality of scholar- +ship, and competitiveness of a given journal. +While impact factor comparisons currently favor +well-established, conventional publishers, alternative +metrics (sometimes referred to as “altmetrics”) have +recently emerged as a way to incorporate new data +sources—such as the number of downloads and page +views, media coverage, or even social media dissemina- +tion—to measure the impact of a journal or of a work in +light of recent technological developments.43 Authors +can use these alternative metrics to complement +citation-based metrics as a signal of the wide and +diverse impact of their works.44 +Authors may also be able to find an open access journal +associated with a prominent conventional publisher in +their field, allowing them to enjoy the benefits of both a +well-respected brand and open access. +Although the development of alternative metrics +is promising, some authors may not want to put +important employment decisions at risk if their insti- +tutions heavily rely on journals’ impact factors. Authors +with a particular concern about impact factors may +alternatively consider publishing with a high- +impact-factor, conventional journal and negotiating to +retain the right to self-archive, as discussed in +Chapter 6. Some conventional publishers also offer +“hybrid” options whereby articles published in a +subscription journal are also made openly accessible, +typically in exchange for a fee. +The Journal’s Reputation for +Responsible Business Practices +Some journals are better than others at editing man- +uscripts, getting issues to press in a timely manner, +and other aspects of providing service to authors and +readers. Before you commit your work to a journal, you +should be familiar with its recent publishing record. If +72 Understanding Open Access +you know other authors who have published there, you +might ask them about their experience. +The Open Access Licensing Terms Available +Through the Publisher +A given open access publisher may have only one type +of license that it automatically applies to all the works +it publishes. Thus, authors wishing to fine-tune the +“openness” of their works (see Chapter 4) should +research the licensing policies of the open access +journals in which they are interested. The Directory of +Open Access Journals (“DOAJ”) allows authors to search +by licensing terms for easy comparison. +The Technical Openness of the Publication +Authors interested in making sure that their works are +more technically open should consider the technical +capabilities of different publishers. (See Chapter 4.) +Whether The Publisher Charges Author-Side Fees +and its Policy Regarding Exemptions +As discussed above, some, but not all, open access pub- +lishers charge publication fees. The DOAJ allows users +to filter search results for journals that do not charge +publication fees. +Even if your desired journal charges a fee, you may +not need to pay out-of-pocket. Consider the following +alternate strategies to cover the fee: +• Ask your institution or funding entity if it has +earmarked funds available to pay for open access +publication fees.46 +• Apply for grant funding specifically designated +for open access publishing (for example, from a +government agency, private foundation, or insti- +tutional library).47 +• Partake in the journal’s fee assistance program48 +or institutional discount.49 +• Apply for a publication fee waiver if the journal +offers one. diff --git a/benchmark/perfect_context/LLM Tokenization Introduces Unfairness.txt b/benchmark/perfect_context/LLM Tokenization Introduces Unfairness.txt new file mode 100644 index 0000000..9c13613 --- /dev/null +++ b/benchmark/perfect_context/LLM Tokenization Introduces Unfairness.txt @@ -0,0 +1,34 @@ +Research from the University of Oxford highlights +how inequality in AI originates at the tokenization +stage. Tokenization, the process of breaking down +text into smaller units for processing and analysis, +exhibits significant variability across languages. +The number of tokens used for the same sentence +can vary up to 15 times between languages. For +instance, Portuguese closely matches English in the +efficiency of the GPT-4 tokenizer, yet it still requires +approximately 50% more tokens to convey the +same content. The Shan language is the furthest +from English, needing 15 times more tokens. Figure +3.5.9 visualizes the concept of a context window +while figure 3.5.10 illustrates the token consumption +of the same sentence across different languages. +The authors identify three major inequalities that +result from variable tokenization. First, users of +languages that require more tokens than English +for the same content face up to four times higher +inference costs and longer processing times, as +both are dependent on the number of tokens. +Figure 3.5.11 illustrates the variation in token +length and execution time for the same sentence +across different languages or language families. +Second, these users may also experience increased +processing times because models take longer +to process a greater number of tokens. Lastly, +given that models operate within a fixed context +window—a limit on the amount of text or content +that can be input—languages that require more +tokens proportionally use up more of this window. +This can reduce the available context for the model, +potentially diminishing the quality of service for +those users. diff --git a/benchmark/perfect_context/LOCATION ABILITIES.txt b/benchmark/perfect_context/LOCATION ABILITIES.txt new file mode 100644 index 0000000..9eb1003 --- /dev/null +++ b/benchmark/perfect_context/LOCATION ABILITIES.txt @@ -0,0 +1,56 @@ +There are four different abilities that players will encounter on the Locations in +the game: +FIELDS +These cards don’t have any keywords, only icons shown in the middle of the ability +description box representing the Goods that may be Harvested from this Location. +Fields give the player the depicted Goods immediately when the Field is built, and +each time a Harvest action is declared on that Field. +IMPORTANT: Some Field Locations work only as upgrades. These Fields have +the Resources on the right side of the ability description box. Once built these +Locations have to be attached to an existing Field in your Empire of the same +Goods type. The card is slid underneath the existing Field with just the additional +Good icons on the right visible. When the Field upgrade is built, the player +gains ONLY the Goods provided by that upgrade card, not the entire Field it is +upgrading. When the player Harvests a Field with one or more upgrades, then +they gain Goods from this Field and all of its upgrades. Upgrade cards give 1 +at the end of the game just like any other Location +FEATURE +These cards can have various special abilities described on the cards. These card +abilities may be triggered during any player’s turn, and in certain cases they can +grant the player Goods after taking specific actions. +INSTANT +These cards’ ability is resolved immediately when they are played, and the card is +discarded afterward. +ACTION +To use the Location’s ability a player has to exhaust the card by rotating it 90 +degrees to the right. They then must pay any cost described in the ability text. +Once the card is exhausted, its ability is no longer available. +IMPORTANT: Only Action Location cards are exhausted after use. Both Feature +and Field location cards are not exhausted, even if a player gained something +from them multiple times, unless stated otherwise. +OTE 1: Several Locations can also +have a Building Bonus which is an +additional one-time ability that +activates when such Location is +placed in a player’s Empire. +NOTE 2: Some abilities in the +game have a ‘/’ divider between +presented choices. This should be +treated as an ‘or.’ A player must +choose one option when using that +ability, for instance ‘Gain 1 / .’ +means that a player chooses either +to gain 1 or 1 +NOTE 3: Some abilities allow players to Pillage +or Conquer right away, before the Expedition +phase, in such cases a Ship is still required to be +on the Expedition board to use it, along with any +appropriate Goods assigned to it. This ability allows +the Ship to resolve the Pillage or Conquest outside +of the Expedition phase and to gain the card/Goods +in the current Action phase. The Ship is returned to +the player’s supply, and any Raze token and/or Fish +assigned to it is discarded as usual, even if it wasn’t +used. A Ship that was returned in this manner may be +used again during this round to Sail (described later +in this section). diff --git a/benchmark/perfect_context/LOOKOUT PHASE.txt b/benchmark/perfect_context/LOOKOUT PHASE.txt new file mode 100644 index 0000000..c756dc7 --- /dev/null +++ b/benchmark/perfect_context/LOOKOUT PHASE.txt @@ -0,0 +1,36 @@ +In this phase each player may decide to acquire up to 4 new cards +and add them to their hand. +To execute the Lookout phase follow these steps: +1. Each player should set aside any cards in their hand for the +time being. +2. Each player draws 4 cards from the top of their Clan deck and +decides which of these cards to keep. +3. For each card that they wish to keep, they have to Spend 1 of +their by putting it on their Clan tile. +4. Any cards they do not want to keep should be placed in the +Clan’s discard pile. +5. Players retrieve the cards they had set aside. + +Example: +2 cards +placed +in the +discard +pile +2 Workers spent to keep 2 cards + +IMPORTANT: Some cards have a Storage effect that may provide +players with extra Goods if enough of the required Resources are +assigned to them. A player gains those Goods at the end of each +Lookout phase, before starting the Action phase (more about this +effect on page 12). +NOTE 1: There’s no limit to the number of cards a player may have +in their hand. +NOTE 2: If there are no cards left in your Clan deck, shuffle the +discard pile to create a new deck. +> Cards in a player’s hand are kept secret from other players + +No production phase! +A reminder, especially to all the fans of Imperial Settlers, +there is no production phase; players will have to use +Harvest action to gain Goods shown on the Field cards. diff --git a/benchmark/perfect_context/LORA ABSTRACT.txt b/benchmark/perfect_context/LORA ABSTRACT.txt new file mode 100644 index 0000000..316298a --- /dev/null +++ b/benchmark/perfect_context/LORA ABSTRACT.txt @@ -0,0 +1,18 @@ +An important paradigm of natural language processing consists of large-scale pre- +training on general domain data and adaptation to particular tasks or domains. As +we pre-train larger models, full fine-tuning, which retrains all model parameters, +becomes less feasible. Using GPT-3 175B as an example – deploying indepen- +dent instances of fine-tuned models, each with 175B parameters, is prohibitively +expensive. We propose Low-Rank Adaptation, or LoRA, which freezes the pre- +trained model weights and injects trainable rank decomposition matrices into each +layer of the Transformer architecture, greatly reducing the number of trainable pa- +rameters for downstream tasks. Compared to GPT-3 175B fine-tuned with Adam, +LoRA can reduce the number of trainable parameters by 10,000 times and the +GPU memory requirement by 3 times. LoRA performs on-par or better than fine- +tuning in model quality on RoBERTa, DeBERTa, GPT-2, and GPT-3, despite hav- +ing fewer trainable parameters, a higher training throughput, and, unlike adapters, +no additional inference latency. We also provide an empirical investigation into +rank-deficiency in language model adaptation, which sheds light on the efficacy of +LoRA. We release a package that facilitates the integration of LoRA with PyTorch +models and provide our implementations and model checkpoints for RoBERTa, +DeBERTa, and GPT-2 at https://github.com/microsoft/LoRA diff --git a/benchmark/perfect_context/OVERCOMING RESERVATIONS ABOUT OPEN ACCESS.txt b/benchmark/perfect_context/OVERCOMING RESERVATIONS ABOUT OPEN ACCESS.txt new file mode 100644 index 0000000..637c51e --- /dev/null +++ b/benchmark/perfect_context/OVERCOMING RESERVATIONS ABOUT OPEN ACCESS.txt @@ -0,0 +1,152 @@ +Some authors who see the potential of open access may +still have reservations about whether open access is +right for them. Some of these reservations are based +on myths about open access and can be resolved by +clearing up misunderstandings. Other reservations +reflect the current limitations of open access options +available to some authors. Fortunately, open access +opportunities are growing as the open access move- +ment spreads through more author communities. +Existing solutions and encouraging developments are +detailed below to address authors’ common reserva- +tions about open access. +Open access is compatible +with peer review and prestige +Peer review, selective submission standards, and other +attributes of prestigious publication are indepen- +dent of the openness of the publication. Some open +access publications apply the highest standards of +quality control, some proprietary publications publish +low-quality works, and vice versa. +Introduction 11 +It is true, however, that some new open access +publications do not yet command the same level of +prestige as the best-established, conventional pub- +lications. The prestige of publishing a work with a +leading conventional publisher may dissuade some +authors from publishing with an open access publisher. +This is particularly true of junior faculty whose tenure +prospects may depend on a publication record in top +proprietary outlets. +We expect this will lessen as open access pub- +lishers establish their reputations and proliferate +across disciplines, as existing publishers adopt more +open practices, as more research funders require open +access to the works they fund, and as senior scholars +signal the value of open access. Encouragingly, an +increasing number of open access journals have already +achieved high regard in their disciplines, as described +in Chapter 5. +In the meantime, conventional publication and +open access are not mutually exclusive. For example, +many conventional publishers allow authors who +publish with them to also upload the authors’ final +versions of their works to open access repositories. In +such cases, authors can benefit from the imprint of +a well-established print publisher while still making +their works openly accessible. (For more information, +please see Chapter 7.) +Authors do not always have to pay to +make their works openly accessible +Some authors have reservations about open access +because they think they will need to pay to make +their works openly accessible. This is not always true. +Although some open access publishers do charge a fee +to cover the cost of publishing a work, many authors +make their works openly accessible without incurring +any costs. In fact, the majority of open access journals +charge no author-side fees at all.7 Even where pub- +lishers charge publication fees, there are many ways +that authors can mitigate those costs, as discussed in +Chapter 5. Moreover, depositing a work in an institu- +tional open access repository is always free for authors. +Open access options are available +for book authors +The traditional practice in book publishing has been +for authors to work with conventional publishers, +typically assigning their copyrights in exchange for +royalty streams from the sales of their books. Publish- +ers may be reluctant to agree to open access because +they believe that it will undermine books sales. Authors +who depend on royalties likely share this concern. +Moreover, this book publishing convention still works +well for many authors. +However, some authors are excited by the +potential of open access book publishing to increase the +audience for their works. Open access book publishing +options are increasing for these authors. +Many publishers are developing programs to +make books openly accessible.8 For example, the +University of California Press recently launched Luminos, +an open access publishing program for monographs.9 +Authors who publish with Luminos can make digital +editions of their books openly accessible under the Uni- +versity of California Press imprint. Open Humanities Press +has also launched an open access program for mono- +graphs, making the books it publishes in print available +as full-text digital editions published under open +licenses. +10 Additionally, many university presses make +academic books openly available in the Open Access Pub- +lishing in European Networks (“OAPEN”) Library. 14 +Authors can also make their self-published books +openly accessible by uploading electronic versions to +open access repositories or personal websites. Institu- +tions that host repositories will sometimes also offer +book-formatting resources for authors who deposit +book-length works in their repositories. For example, +eScholarship, the University of California’s institutional +repository, provides authors tools to create digital +versions of their books and also provides University of +California authors print-on-demand services.12 (For +more information on open access repositories, please +see Chapter 5.) +Additionally, book authors who are interested +in open access may choose to negotiate with conven- +tional publishers to publish their books in print but +also retain the rights to openly license their books, +as described in Chapter 7. Authors who have already +assigned their rights to conventional publishers may be +able to exercise or negotiate for rights reversions that +would allow them to make their books openly accessi- +ble. For more on this possibility, please see the +Authors who make their works openly accessible +can require attribution +Some authors are concerned that open access neces- +sarily means others will be allowed to use their works +without giving them credit. This is not true. Although +some authors opt to allow others to use their openly +accessible work without retaining a legal right to insist +on credit, the vast majority of authors select license +terms that require others to give them credit for their +works. (Please see Chapter 4 to learn more about open +access licensing.) Furthermore, even if unattributed +copying of an open access work does not amount to +copyright infringement, it may still amount to plagia- +rism—thus running afoul of longstanding norms within +scholarly and publishing communities. +Authors who make their works openly accessible +can still preserve the integrity of their works +Some authors are concerned that the integrity of their +works will be compromised if they make their works +openly accessible. An author might worry, for example, +that her work will be modified in a way that distorts +its meaning and discredits her. However, authors can +use license terms to control how others are allowed +to use their works (subject to some limitations, such +as fair use). Open access licenses often include pro- +visions that protect against misuse, prevent loss of +integrity, and protect author reputation. For example, +Creative Commons licenses require attribution, unless +the author does not want to be attributed; include an +obligation to indicate whether an author’s work has +been modified or not, even if those modifications are +trivial; and require users to link back to the original +if a link is provided. In addition, authors who do not +want to permit others to modify their works can select +license terms that allow free access and distribution of +verbatim copies but not adaptations. More information +on open access licenses can be found in Chapter 4. +Finally, scholarly norms for citation and regarding pla- +giarism are not supplanted when authors openly license +their works. diff --git a/benchmark/perfect_context/OVERVIEW AND GOAL.txt b/benchmark/perfect_context/OVERVIEW AND GOAL.txt new file mode 100644 index 0000000..8234630 --- /dev/null +++ b/benchmark/perfect_context/OVERVIEW AND GOAL.txt @@ -0,0 +1,7 @@ +A game plays over 3 successive chapters that unfold similarly. +On your turn, strengthen your Skills, hoard your treasure, stretch your presence across Middle-earth, +rally Races to your cause, or advance the Quest of the Ring. +Immediately win the game by fulfilling one of the 3 victory conditions: +complete the Quest of the Ring, +rally the support of 6 different Races, +conquer Middle-earth diff --git a/benchmark/perfect_context/Penalties.txt b/benchmark/perfect_context/Penalties.txt new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/perfect_context/Prohibited AI Practices.txt b/benchmark/perfect_context/Prohibited AI Practices.txt new file mode 100644 index 0000000..fe11efa --- /dev/null +++ b/benchmark/perfect_context/Prohibited AI Practices.txt @@ -0,0 +1,105 @@ +1. The following AI practices shall be prohibited: +(a) the placing on the market, the putting into service or the use of an AI system that deploys subliminal techniques beyond +a person’s consciousness or purposefully manipulative or deceptive techniques, with the objective, or the effect of +materially distorting the behaviour of a person or a group of persons by appreciably impairing their ability to make an +informed decision, thereby causing them to take a decision that they would not have otherwise taken in a manner that +causes or is reasonably likely to cause that person, another person or group of persons significant harm; +(b) the placing on the market, the putting into service or the use of an AI system that exploits any of the vulnerabilities of +a natural person or a specific group of persons due to their age, disability or a specific social or economic situation, with +the objective, or the effect, of materially distorting the behaviour of that person or a person belonging to that group in +a manner that causes or is reasonably likely to cause that person or another person significant harm; +(c) the placing on the market, the putting into service or the use of AI systems for the evaluation or classification of natural +persons or groups of persons over a certain period of time based on their social behaviour or known, inferred or +predicted personal or personality characteristics, with the social score leading to either or both of the following: +(i) detrimental or unfavourable treatment of certain natural persons or groups of persons in social contexts that are +unrelated to the contexts in which the data was originally generated or collected; +(ii) detrimental or unfavourable treatment of certain natural persons or groups of persons that is unjustified or +disproportionate to their social behaviour or its gravity; +(d) the placing on the market, the putting into service for this specific purpose, or the use of an AI system for making risk +assessments of natural persons in order to assess or predict the risk of a natural person committing a criminal offence, +based solely on the profiling of a natural person or on assessing their personality traits and characteristics; this +prohibition shall not apply to AI systems used to support the human assessment of the involvement of a person in +a criminal activity, which is already based on objective and verifiable facts directly linked to a criminal activity; +(e) the placing on the market, the putting into service for this specific purpose, or the use of AI systems that create or +expand facial recognition databases through the untargeted scraping of facial images from the internet or CCTV footage; +(f) the placing on the market, the putting into service for this specific purpose, or the use of AI systems to infer emotions +of a natural person in the areas of workplace and education institutions, except where the use of the AI system is +intended to be put in place or into the market for medical or safety reasons; +OJ L, 12.7.2024 EN +ELI: http://data.europa.eu/eli/reg/2024/1689/oj(g) the placing on the market, the putting into service for this specific purpose, or the use of biometric categorisation +systems that categorise individually natural persons based on their biometric data to deduce or infer their race, political +opinions, trade union membership, religious or philosophical beliefs, sex life or sexual orientation; this prohibition does +not cover any labelling or filtering of lawfully acquired biometric datasets, such as images, based on biometric data or +categorizing of biometric data in the area of law enforcement; +(h) the use of ‘real-time’ remote biometric identification systems in publicly accessible spaces for the purposes of law +enforcement, unless and in so far as such use is strictly necessary for one of the following objectives: +(i) the targeted search for specific victims of abduction, trafficking in human beings or sexual exploitation of human +beings, as well as the search for missing persons; +(ii) the prevention of a specific, substantial and imminent threat to the life or physical safety of natural persons or +a genuine and present or genuine and foreseeable threat of a terrorist attack; +(iii) the localisation or identification of a person suspected of having committed a criminal offence, for the purpose of +conducting a criminal investigation or prosecution or executing a criminal penalty for offences referred to in +Annex II and punishable in the Member State concerned by a custodial sentence or a detention order for +a maximum period of at least four years. +Point (h) of the first subparagraph is without prejudice to Article 9 of Regulation (EU) 2016/679 for the processing of +biometric data for purposes other than law enforcement. +2. The use of ‘real-time’ remote biometric identification systems in publicly accessible spaces for the purposes of law +enforcement for any of the objectives referred to in paragraph 1, first subparagraph, point (h), shall be deployed for the +purposes set out in that point only to confirm the identity of the specifically targeted individual, and it shall take into +account the following elements: +(a) the nature of the situation giving rise to the possible use, in particular the seriousness, probability and scale of the harm +that would be caused if the system were not used; +(b) the consequences of the use of the system for the rights and freedoms of all persons concerned, in particular the +seriousness, probability and scale of those consequences. +In addition, the use of ‘real-time’ remote biometric identification systems in publicly accessible spaces for the purposes of +law enforcement for any of the objectives referred to in paragraph 1, first subparagraph, point (h), of this Article shall +comply with necessary and proportionate safeguards and conditions in relation to the use in accordance with the national +law authorising the use thereof, in particular as regards the temporal, geographic and personal limitations. The use of the +‘real-time’ remote biometric identification system in publicly accessible spaces shall be authorised only if the law +enforcement authority has completed a fundamental rights impact assessment as provided for in Article 27 and has +registered the system in the EU database according to Article 49. However, in duly justified cases of urgency, the use of such +systems may be commenced without the registration in the EU database, provided that such registration is completed +without undue delay. +3. For the purposes of paragraph 1, first subparagraph, point (h) and paragraph 2, each use for the purposes of law +enforcement of a ‘real-time’ remote biometric identification system in publicly accessible spaces shall be subject to a prior +authorisation granted by a judicial authority or an independent administrative authority whose decision is binding of the +Member State in which the use is to take place, issued upon a reasoned request and in accordance with the detailed rules of +national law referred to in paragraph 5. However, in a duly justified situation of urgency, the use of such system may be +commenced without an authorisation provided that such authorisation is requested without undue delay, at the latest +within 24 hours. If such authorisation is rejected, the use shall be stopped with immediate effect and all the data, as well as +the results and outputs of that use shall be immediately discarded and deleted. +The competent judicial authority or an independent administrative authority whose decision is binding shall grant the +authorisation only where it is satisfied, on the basis of objective evidence or clear indications presented to it, that the use of +the ‘real-time’ remote biometric identification system concerned is necessary for, and proportionate to, achieving one of the +EN OJ L, 12.7.2024 +52/144objectives specified in paragraph 1, first subparagraph, point (h), as identified in the request and, in particular, remains +limited to what is strictly necessary concerning the period of time as well as the geographic and personal scope. In deciding +on the request, that authority shall take into account the elements referred to in paragraph 2. No decision that produces an +adverse legal effect on a person may be taken based solely on the output of the ‘real-time’ remote biometric identification +system. +4. Without prejudice to paragraph 3, each use of a ‘real-time’ remote biometric identification system in publicly +accessible spaces for law enforcement purposes shall be notified to the relevant market surveillance authority and the +national data protection authority in accordance with the national rules referred to in paragraph 5. The notification shall, as +a minimum, contain the information specified under paragraph 6 and shall not include sensitive operational data. +5. A Member State may decide to provide for the possibility to fully or partially authorise the use of ‘real-time’ remote +biometric identification systems in publicly accessible spaces for the purposes of law enforcement within the limits and +under the conditions listed in paragraph 1, first subparagraph, point (h), and paragraphs 2 and 3. Member States concerned +shall lay down in their national law the necessary detailed rules for the request, issuance and exercise of, as well as +supervision and reporting relating to, the authorisations referred to in paragraph 3. Those rules shall also specify in respect +of which of the objectives listed in paragraph 1, first subparagraph, point (h), including which of the criminal offences +referred to in point (h)(iii) thereof, the competent authorities may be authorised to use those systems for the purposes of +law enforcement. Member States shall notify those rules to the Commission at the latest 30 days following the adoption +thereof. Member States may introduce, in accordance with Union law, more restrictive laws on the use of remote biometric +identification systems. +6. National market surveillance authorities and the national data protection authorities of Member States that have been +notified of the use of ‘real-time’ remote biometric identification systems in publicly accessible spaces for law enforcement +purposes pursuant to paragraph 4 shall submit to the Commission annual reports on such use. For that purpose, the +Commission shall provide Member States and national market surveillance and data protection authorities with a template, +including information on the number of the decisions taken by competent judicial authorities or an independent +administrative authority whose decision is binding upon requests for authorisations in accordance with paragraph 3 and +their result. +7. The Commission shall publish annual reports on the use of real-time remote biometric identification systems in +publicly accessible spaces for law enforcement purposes, based on aggregated data in Member States on the basis of the +annual reports referred to in paragraph 6. Those annual reports shall not include sensitive operational data of the related +law enforcement activities. +8. This Article shall not affect the prohibitions that apply where an AI practice infringes other Union law. diff --git a/benchmark/perfect_context/RAID.txt b/benchmark/perfect_context/RAID.txt new file mode 100644 index 0000000..1c8febd --- /dev/null +++ b/benchmark/perfect_context/RAID.txt @@ -0,0 +1,12 @@ +This action allows a player to Raid an opponent’s Action Location +card to immediately exhaust it so that the opponent cannot use its +ability until unexhausted. +A player can only use this action if he has a Raze token. Only Action +Locations may be Raided. Remember that you cannot Raid opponent +that has already passed +TO RAID AN OPPONENT’S LOCATION: +> Choose an opponent and any one of their unexhausted Action +Locations. +> Discard 1 token. +> Exhaust the opponent’s Location (Rotate it 90 degrees to +the right). diff --git a/benchmark/perfect_context/Reporting of serious incidents.txt b/benchmark/perfect_context/Reporting of serious incidents.txt new file mode 100644 index 0000000..74bc4c6 --- /dev/null +++ b/benchmark/perfect_context/Reporting of serious incidents.txt @@ -0,0 +1,40 @@ +1. Providers of high-risk AI systems placed on the Union market shall report any serious incident to the market +surveillance authorities of the Member States where that incident occurred. +OJ L, 12.7.2024 EN +ELI: http://data.europa.eu/eli/reg/2024/1689/oj 101/144 +2. The report referred to in paragraph 1 shall be made immediately after the provider has established a causal link +between the AI system and the serious incident or the reasonable likelihood of such a link, and, in any event, not later than +15 days after the provider or, where applicable, the deployer, becomes aware of the serious incident. +The period for the reporting referred to in the first subparagraph shall take account of the severity of the serious incident. +3. Notwithstanding paragraph 2 of this Article, in the event of a widespread infringement or a serious incident as +defined in Article 3, point (49)(b), the report referred to in paragraph 1 of this Article shall be provided immediately, and +not later than two days after the provider or, where applicable, the deployer becomes aware of that incident. +4. Notwithstanding paragraph 2, in the event of the death of a person, the report shall be provided immediately after the +provider or the deployer has established, or as soon as it suspects, a causal relationship between the high-risk AI system and +the serious incident, but not later than 10 days after the date on which the provider or, where applicable, the deployer +becomes aware of the serious incident. +5. Where necessary to ensure timely reporting, the provider or, where applicable, the deployer, may submit an initial +report that is incomplete, followed by a complete report. +6. Following the reporting of a serious incident pursuant to paragraph 1, the provider shall, without delay, perform the +necessary investigations in relation to the serious incident and the AI system concerned. This shall include a risk assessment +of the incident, and corrective action. +The provider shall cooperate with the competent authorities, and where relevant with the notified body concerned, during +the investigations referred to in the first subparagraph, and shall not perform any investigation which involves altering the +AI system concerned in a way which may affect any subsequent evaluation of the causes of the incident, prior to informing +the competent authorities of such action. +7. Upon receiving a notification related to a serious incident referred to in Article 3, point (49)(c), the relevant market +surveillance authority shall inform the national public authorities or bodies referred to in Article 77(1). The Commission +shall develop dedicated guidance to facilitate compliance with the obligations set out in paragraph 1 of this Article. That +guidance shall be issued by 2 August 2025, and shall be assessed regularly. +8. The market surveillance authority shall take appropriate measures, as provided for in Article 19 of Regulation (EU) +2019/1020, within seven days from the date it received the notification referred to in paragraph 1 of this Article, and shall +follow the notification procedures as provided in that Regulation. +9. For high-risk AI systems referred to in Annex III that are placed on the market or put into service by providers that are +subject to Union legislative instruments laying down reporting obligations equivalent to those set out in this Regulation, the +notification of serious incidents shall be limited to those referred to in Article 3, point (49)(c). +10. For high-risk AI systems which are safety components of devices, or are themselves devices, covered by Regulations +(EU) 2017/745 and (EU) 2017/746, the notification of serious incidents shall be limited to those referred to in Article 3, +point (49)(c) of this Regulation, and shall be made to the national competent authority chosen for that purpose by the +Member States where the incident occurred. +11. National competent authorities shall immediately notify the Commission of any serious incident, whether or not +they have taken action on it, in accordance with Article 20 of Regulation (EU) 2019/1020 diff --git a/benchmark/perfect_context/Risk Perception.txt b/benchmark/perfect_context/Risk Perception.txt new file mode 100644 index 0000000..7b3eb5d --- /dev/null +++ b/benchmark/perfect_context/Risk Perception.txt @@ -0,0 +1,29 @@ +In collaboration with Accenture, this year a team of +Stanford researchers ran a global survey with respondents +from more than 1,000 organizations to assess the global +state of responsible AI. The organizations, with total +revenues of at least $500 million each, were taken +from 20 countries and 19 industries and responded in +February–March 2024.3 The objective of the Global State +of Responsible AI survey was to gain an understanding of +the challenges of adopting responsible AI practices and to +allow for a comparison of responsible AI activities across +10 dimensions and across surveyed industries and regions. +Respondents were asked which risks were relevant to +them, given their AI adoption strategy; i.e., depending +on whether they develop, deploy, or use generative or +nongenerative AI. They were presented with a list +of 14 risks and could select all that apply to them, +given their AI adoption strategies.4 The researchers +found that privacy and data governance risks, e.g., +the use of data without the owner’s consent or data +leaks, are the leading concerns across the globe. +Notably, they observe that these concerns are +significantly higher in Asia and Europe compared to +North America. Fairness risks were only selected by +20% of North American respondents, significantly +less than respondents in Asia (31%) and Europe +(34%) (Figure 3.1.5). Respondents in Asia selected, +on average, the highest number of relevant risks +(4.99), while Latin American respondents selected, +on average, the fewest (3.64). diff --git a/benchmark/perfect_context/SECTION I. INTRODUCTION.txt b/benchmark/perfect_context/SECTION I. INTRODUCTION.txt new file mode 100644 index 0000000..83f5ebb --- /dev/null +++ b/benchmark/perfect_context/SECTION I. INTRODUCTION.txt @@ -0,0 +1,65 @@ +UNTIL VERY RECENTLY, AUTHORS WHO +wanted their works to be widely available had little +choice but to submit their works to publishers who +took assignments of the authors’ copyrights and +exercised them according to a proprietary “all rights +reserved” model.1 The advent of global digital networks +now provides authors who write to be read with excit- +ing new options for communicating their ideas broadly. +One of these options is open access. +The basic idea of open access is that it makes +copyrightable works available without all of the access +barriers associated with the “all rights reserved” +model. These can take the form of price barriers and +permission barriers. 2 Open access typically comes in +two forms. What has come to be known as gratis open +access is the practice of making a work available online +ree of charge (also called public access). The term +libre open access (also called full open access) refers to +the practice of making a work available online free of +charge and with some additional reuse rights, typically +granted through a Creative Commons license. Gratis +open access removes price barriers, whereas libre open +access additionally removes at least some permission +barriers, allowing users to copy, redistribute, and/or +adapt a work. Open access contrasts with more tradi- +tional models of restricted-access publishing in which +copies of works are made directly available only to +paying customers. +Authors who are interested in increasing access to +their works may want to understand whether elimi- +nating cost and permissions barriers is a good option +for them and, if so, how they might release their +works under open access terms. Other authors may +be required by their employer or funding agency to +comply with an open access policy. Still other authors +may be skeptical about whether open access is compat- +ible with their publication goals—including rigorous +peer review, prestige, or monetary compensation—and +want to learn more. +A note on terminology: Many open access proponents and some +research funders +3 do not consider a work truly openly accessible +if it only meets gratis open access requirements. Indeed, only +libre open access is compliant with most major international +statements that define open access.4 For readability, we use the +term open access in this guide to describe the practice of making +a work available to readers free of charge on the Internet, regard- +less of whether subsequent reuse is permitted. The distinction is +important, however, and we try to make clear in our discussion +below whether we are referring to removal of only price, or both +price and permission barriers. Another way to think about open +access is along a continuum that considers variables including +both price and permissions barriers. If you would like to learn +more about the spectrum of open access, we recommend the +guide How Open Is It?. +Authors Alliance is a nonprofit organization that +promotes authorship for the public good by supporting +authors who write to be read.6 Pursuant to this mission, +Authors Alliance created this guide to help authors +understand and evaluate opportunities to make their +works openly accessible. In this way, Authors Alliance +seeks to help authors further their interest in +disseminating knowledge and products of the +imagination broadly and to enhance the public’s +access to and reuse of these works. diff --git a/benchmark/perfect_context/Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes.txt b/benchmark/perfect_context/Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes.txt new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/perfect_context/Training Cost.txt b/benchmark/perfect_context/Training Cost.txt new file mode 100644 index 0000000..2532878 --- /dev/null +++ b/benchmark/perfect_context/Training Cost.txt @@ -0,0 +1,44 @@ +A prominent topic in discussions about foundation +models is their speculated costs. While AI +companies seldom reveal the expenses involved +in training their models, it is widely believed that +these costs run into millions of dollars and are +rising. For instance, OpenAI’s CEO, Sam Altman, +mentioned that the training cost for GPT-4 was over +$100 million. This escalation in training expenses +has effectively excluded universities, traditionally +centers of AI research, from developing their own +leading-edge foundation models. In response, policy +initiatives, such as President Biden’s Executive Order +on AI, have sought to level the playing field between +industry and academia by creating a National AI +Research Resource, which would grant nonindustry +actors the compute and data needed to do higher +level AI-research. +Understanding the cost of training AI models is +important, yet detailed information on these costs +remains scarce. The AI Index was among the first to +offer estimates on the training costs of foundation +models in last year’s publication. This year, the AI +Index has collaborated with Epoch AI, an AI research +institute, to substantially enhance and solidify the +robustness of its AI training cost estimates.9 To +estimate the cost of cutting-edge models, the Epoch +team analyzed training duration, as well as the type, +quantity, and utilization rate of the training hardware, +using information from publications, press releases, or +technical reports related to the models.10 +Figure 1.3.21 visualizes the estimated training cost +associated with select AI models, based on cloud +compute rental prices. AI Index estimates validate +suspicions that in recent years model training costs +have significantly increased. For example, in 2017, +the original Transformer model, which introduced the +architecture that underpins virtually every modern +LLM, cost around $900 to train.11 RoBERTa Large, +released in 2019, which achieved state-of-the-art +results on many canonical comprehension benchmarks +like SQuAD and GLUE, cost around $160,000 to train. +Fast-forward to 2023, and training costs for OpenAI’s +GPT-4 and Google’s Gemini Ultra are estimated to be +around $78 million and $191 million, respectively. diff --git a/benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt b/benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt new file mode 100644 index 0000000..9ca892f --- /dev/null +++ b/benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt @@ -0,0 +1,44 @@ +1. Providers shall ensure that AI systems intended to interact directly with natural persons are designed and developed in +such a way that the natural persons concerned are informed that they are interacting with an AI system, unless this is +obvious from the point of view of a natural person who is reasonably well-informed, observant and circumspect, taking +into account the circumstances and the context of use. This obligation shall not apply to AI systems authorised by law to +detect, prevent, investigate or prosecute criminal offences, subject to appropriate safeguards for the rights and freedoms of +third parties, unless those systems are available for the public to report a criminal offence. +2. Providers of AI systems, including general-purpose AI systems, generating synthetic audio, image, video or text +content, shall ensure that the outputs of the AI system are marked in a machine-readable format and detectable as +artificially generated or manipulated. Providers shall ensure their technical solutions are effective, interoperable, robust and +reliable as far as this is technically feasible, taking into account the specificities and limitations of various types of content, +the costs of implementation and the generally acknowledged state of the art, as may be reflected in relevant technical +standards. This obligation shall not apply to the extent the AI systems perform an assistive function for standard editing or +do not substantially alter the input data provided by the deployer or the semantics thereof, or where authorised by law to +detect, prevent, investigate or prosecute criminal offences. +3. Deployers of an emotion recognition system or a biometric categorisation system shall inform the natural persons +exposed thereto of the operation of the system, and shall process the personal data in accordance with Regulations (EU) +2016/679 and (EU) 2018/1725 and Directive (EU) 2016/680, as applicable. This obligation shall not apply to AI systems +used for biometric categorisation and emotion recognition, which are permitted by law to detect, prevent or investigate +criminal offences, subject to appropriate safeguards for the rights and freedoms of third parties, and in accordance with +Union law. +4. Deployers of an AI system that generates or manipulates image, audio or video content constituting a deep fake, shall +disclose that the content has been artificially generated or manipulated. This obligation shall not apply where the use is +authorised by law to detect, prevent, investigate or prosecute criminal offence. Where the content forms part of an evidently +artistic, creative, satirical, fictional or analogous work or programme, the transparency obligations set out in this paragraph +are limited to disclosure of the existence of such generated or manipulated content in an appropriate manner that does not +hamper the display or enjoyment of the work. +Deployers of an AI system that generates or manipulates text which is published with the purpose of informing the public +on matters of public interest shall disclose that the text has been artificially generated or manipulated. This obligation shall +not apply where the use is authorised by law to detect, prevent, investigate or prosecute criminal offences or where the +AI-generated content has undergone a process of human review or editorial control and where a natural or legal person +holds editorial responsibility for the publication of the content. +EN OJ L, 12.7.2024 +82/144 ELI: http://data.europa.eu/eli/reg/2024/1689/oj +5. The information referred to in paragraphs 1 to 4 shall be provided to the natural persons concerned in a clear and +distinguishable manner at the latest at the time of the first interaction or exposure. The information shall conform to the +applicable accessibility requirements. +6. Paragraphs 1 to 4 shall not affect the requirements and obligations set out in Chapter III, and shall be without +prejudice to other transparency obligations laid down in Union or national law for deployers of AI systems. +7. The AI Office shall encourage and facilitate the drawing up of codes of practice at Union level to facilitate the effective +implementation of the obligations regarding the detection and labelling of artificially generated or manipulated content. +The Commission may adopt implementing acts to approve those codes of practice in accordance with the procedure laid +down in Article 56 (6). If it deems the code is not adequate, the Commission may adopt an implementing act specifying +common rules for the implementation of those obligations in accordance with the examination procedure laid down in +Article 98(2). diff --git a/benchmark/perfect_context/U.S. Regulation.txt b/benchmark/perfect_context/U.S. Regulation.txt new file mode 100644 index 0000000..0e6c02a --- /dev/null +++ b/benchmark/perfect_context/U.S. Regulation.txt @@ -0,0 +1,71 @@ +This section examines AI-related regulations enacted +by American regulatory agencies between 2016 and +2023. It provides an analysis of the total number of +regulations, as well as their topics, scope, regulatory +intent, and originating agencies. To compile this +data, the AI Index team performed a keyword search +for “artificial intelligence” on the Federal Register, a +comprehensive repository of government documents +from nearly all branches of the American government, +encompassing more than 436 agencies.8 +Overview +The number of AI-related regulations has risen +significantly, both in the past year and over the last five +years (Figure 7.4.1). In 2023, there were 25 AI-related +regulations, a stark increase from just one in 2016. Last +year alone, the total number of AI-related regulations +grew by 56.3%. +By Relevance +The AI Index categorized AI-related regulations— +those mentioning AI—into three levels of relevance: +low, medium, and high.9 In 2023, the number of +high and medium relevance AI-related regulations +increased compared to 2022. For instance, a high +relevance AI regulation was the Copyright Office +and Library of Congress’ Copyright Registration +Guidance: Works Containing Material Generated by +Artificial Intelligence. This policy statement clarified +registration practices for works incorporating AI- +generated material. Meanwhile, a medium-relevance +example is the Securities and Exchange Commission’s +Cybersecurity Risk Management Strategy, Governance, +and Incident Disclosure, which established +standardized disclosure practices for public companies +concerning cybersecurity risk management, strategy, +governance, and incidents. +Figure 7.4.2 categorizes AI-related regulations in the +United States based on their relevance to AI. A growing +proportion of these regulations is highly relevant to +AI. Among the 25 AI-related regulations enacted in +2023, four were identified as being highly relevant, the +greatest amount since tracking began in 2016. +By Agency10 +Which agencies are the primary sources of AI +regulations? In 2023, the Executive Office of the +President and the Commerce Department led with +five AI-related regulations each, followed by the +Health and Human Services Department and the +Industry and Security Bureau, with each issuing four +(Figure 7.4.3). Furthermore, the number of agencies +issuing AI regulations increased from 17 in 2022 to 21 in +2023, indicating a growing need for clarity and concern +regarding AI among a broader array of American +regulatory bodies. +By Approach +The AI Index categorized regulations based on their +approach: whether they expanded or restricted AI +capabilities.11 Over time, the trend in AI regulations +in the United States has shifted significantly toward +restriction (Figure 7.4.4). In 2023, there were 10 +restrictive AI regulations compared to just three that +were expansive. Conversely in 2020, there were four +regulations that were expansive and one that was +restrictive. +By Subject Matter +In 2023, American AI regulations were categorized by +primary subject matter. The most prevalent subject +matter in AI-related regulation was foreign trade and +international finance, with three instances. Three +topics tied for second place, with two occurrences +each: health; commerce; and science, technology, and +communications (Figure 7.4.5). diff --git a/benchmark/perfect_context/WHY ARE OPEN ACCESS POLICIES ADOPTED?.txt b/benchmark/perfect_context/WHY ARE OPEN ACCESS POLICIES ADOPTED?.txt new file mode 100644 index 0000000..b421446 --- /dev/null +++ b/benchmark/perfect_context/WHY ARE OPEN ACCESS POLICIES ADOPTED?.txt @@ -0,0 +1,33 @@ +Open access policies maximize the value of investment +in research by ensuring that more readers can access +research results and scholarship than if the works were +available through restricted means alone. +Universities, for example, further their educa- +tional missions by implementing open access policies +that make scholarly works more widely available. Some +faculty members have banded together at their respec- +tive institutions to express their collective commit- +ment to open access, resulting in a growing number of +university open access policies in recent years. Under +Why Make Your Work Openly Accessible? 33 +such policies, faculty members typically grant to their +universities the right to deposit faculty-authored works +in institutional repositories. +Under similar policies, government agencies +require grant recipients to deposit their research +findings in open access repositories where they are +available for free public access. The National Institutes of +Health (“NIH”) Public Access Policy is one such policy. 21 +The number of federal open access policies is growing, +largely because the Obama administration issued a +policy directive in 2013 designed to increase public +access to the results of federally funded research.22 +Under the policy, many federal agencies are required to +develop plans to make the published results of federally +funded research freely available to the public. +Foundations that sponsor research are also +increasingly adopting open access policies. For +example, the Bill and Melinda Gates Foundation imple- +mented an open access policy in 2015 that requires +research resulting from the Foundation’s funding to be +made available under libre open access terms. diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 29944d1..b720c48 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -1,54 +1,53 @@ document,section,question,answer https://arxiv.org/pdf/1706.03762,3 Model Architecture,"What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder",C -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many layers compose the encoder?",6 -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many layers compose the decoder?",6 -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"How many parallel attention heads are used?",8 -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned embeddings for the input and output tokens?",YES -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"Does the final model use learned positional embeddings?",NO -https://arxiv.org/pdf/1706.03762,5 Training,"How many GPUs were used for training?",8 -https://arxiv.org/pdf/1706.03762,5 Training,"What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4",B -https://arxiv.org/pdf/1706.03762,5 Training,"What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",B -https://arxiv.org/pdf/1706.03762,5 Training,"How many warmup steps were used?",4000 -https://arxiv.org/pdf/1706.03762,5 Training,"What was the dropout rate used for the base model?",0.1 -https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"How many layers are in the toy model (y = x^2)?",3 -https://arxiv.org/pdf/2210.05189,2.1 Fully Connected Networks,"Does the model use Sigmoid activation function?",NO -https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many parameters are in the y = x^2 toy model tree?",14 +https://arxiv.org/pdf/1706.03762,3.1 Encoder and Decoder Stacks,"How many layers compose the encoder?",6 +https://arxiv.org/pdf/1706.03762,3.1 Encoder and Decoder Stacks"How many layers compose the decoder?",6 +https://arxiv.org/pdf/1706.03762,3.2.2 Multi-Head Attention,"How many parallel attention heads are used?",8 +https://arxiv.org/pdf/1706.03762,3.4 Embeddings and Softmax,"Does the final model use learned embeddings for the input and output tokens?",YES +https://arxiv.org/pdf/1706.03762,3.5 Positional Encoding,"Does the final model use learned positional embeddings?",NO +https://arxiv.org/pdf/1706.03762,5.2 Hardware and Schedule,"How many GPUs were used for training?",8 +https://arxiv.org/pdf/1706.03762,5.2 Hardware and Schedule,"What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4",B +https://arxiv.org/pdf/1706.03762,5.3 Optimizer,"What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",B +https://arxiv.org/pdf/1706.03762,5.3 Optimizer,"How many warmup steps were used?",4000 +https://arxiv.org/pdf/1706.03762,5.4 Regularization,"What was the dropout rate used for the base model?",0.1 https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,"Can recurrent networks also be converted to decision trees?",YES +https://arxiv.org/pdf/2210.05189,3 Experimental Results"How many layers are in the toy model (y = x^2)?",3 +https://arxiv.org/pdf/2210.05189,3 Experimental Results,"Does the toy model (y = x^2) use Sigmoid activation function?",NO +https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many parameters are in the toy model (y = x^2) tree?",14 https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the half-moon neural network?",3 https://arxiv.org/pdf/2210.05189,3 Experimental Results,"What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy",B -https://arxiv.org/pdf/2106.09685v2.pdf,4 Our Method,Does LoRA work with any neural network containing dense layers?,YES -https://arxiv.org/pdf/2106.09685v2.pdf,5.5 Scaling Up to GPT-3,"How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?",850 -https://arxiv.org/pdf/2106.09685v2.pdf,Abstract,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C -https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,"In billions, how many trainable parameters does GPT-3 have?",175 -https://arxiv.org/pdf/2106.09685v2.pdf,1. Introduction,Does LoRA introduce additional inference latency compared to full fine-tuning?,NO -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Prohibited AI Practices (Article 5),"Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces",C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Requirements for High-Risk AI Systems (Article 10),"what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Classification rules (article 51),"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25",C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"TRANSPARENCY OBLIGATIONS FOR PROVIDERS AND DEPLOYERS OF CERTAIN AI SYSTEMS(Article 50)","What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Sharing of information on serious incidents (article 73),How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes (Article 60),"What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Penalties (Article 99),"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher",A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Code of practice (article 56),By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025,A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,"Compliant AI systems which present a risk (article 82)",What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,EU declaration of conformity (article 47),"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Establishment and structure of the European Artificial Intelligence Board (article 65),"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once",B -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,"According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license",B -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000",A -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 2 Benefits of Open Access,what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years,A -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Introduction,Does open access eliminate price barriers?,YES -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chatper 1 What is this guide and who is it for,Are publication fees required for all open access journals?,NO -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 3 Open Access Policies,In what year did the Bill and Melinda Gates foundation implement an open access policy?,2015 -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,Chapter 5 Where do you want to make your work available?,Are Gold Open Access and Green Open Access mutually exclusive.,NO +https://arxiv.org/pdf/2106.09685.pdf,4 OUR METHOD,Does LoRA work with any neural network containing dense layers?,YES +https://arxiv.org/pdf/2106.09685.pdf,5.5 SCALING UP TO GPT-3 175B,"How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?",850 +https://arxiv.org/pdf/2106.09685.pdf,LORA ABSTRACT,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C +https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,"In billions, how many trainable parameters does GPT-3 have?",175 +https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,Does LoRA introduce additional inference latency compared to full fine-tuning?,NO +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Data and data governance,"what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Classification of general-purpose AI models as general-purpose AI models with systemic risk,"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25",C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Transparency obligations for providers and deployers of certain AI systems,"What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Reporting of serious incidents,How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days,B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes,"What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Penalties,"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher",A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Codes of practice,By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025,A +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Compliant AI systems which present a risk,What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately,C +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,EU declaration of conformity,"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years",B +https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Establishment and structure of the European Artificial Intelligence Board,"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once",B +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,SECTION I. INTRODUCTION,"According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license",B +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000",A +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,SECTION I. INTRODUCTION,Does open access eliminate price barriers?,YES +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,OVERCOMING RESERVATIONS ABOUT OPEN ACCESS,Are publication fees required for all open access journals?,NO +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,WHY ARE OPEN ACCESS POLICIES ADOPTED?,In what year did the Bill and Melinda Gates foundation implement an open access policy?,2015 +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?,Are Gold Open Access and Green Open Access mutually exclusive.,NO https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,NO https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,5 https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,5 https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,YES +https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4,B +https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,3 https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples for examined to understand model errors?,50 -https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many chapters does the game last?,3 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many victory conditions are there?,3 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOALEND OF THE GAME,How many different races are there?,6 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,How many different races are there?,6 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Which player begins the game? -A: Sauron -B: The Fellowship -C: Other,A https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Can you take a Chapter card and a Landmark tile on your same turn?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,How many goins does a player take when discarding a card during Chapter 3?,3 @@ -64,14 +63,39 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_R https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOOKOUT PHASE,What is the maximum number of cards a player may acquire during the lookout phase?,4 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOOKOUT PHASE,Is there a limit to the number of cards a player may have in their hand?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTION PHASE,"Can you raid the locations of a player that has passed during the action phase?",NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTION PHASE,How many points in the scoreboard must be reached during the Action phase to trigger the final round?,25 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,Can players conquer and pillage the same island during the expedition phase?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTION PHASEGAME END,How many points in the scoreboard must be reached during the Action phase to trigger the final round?,25 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,Do you need a fish to conquer a distant island?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,How many victory points you get from each conquered island?,1 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,CLEANUP PHASE,Is there a cleanup phase in the final round?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,BUILD A LOCATION,How many victory points are granted by a built Field Location card that work as an upgrade?,1 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTIONS,Can you use the raid action without a Raze token?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,Can the game end in a tie?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOCATION ABILITIES,How many victory points are granted by a built Field Location card that work as an upgrade?,1 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOCATION ABILITIES,Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,RAID,Can you use the raid action without a Raze token?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,Can the game end in a tie?,YES -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,4.4 Toilets,"Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold",B +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,"If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie",A +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,2.1. Toilets,"Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold",B https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,CARBON MONOXIDE DETECTION AND VENTING,"In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground",C -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,Natural lighting,"What is the daylight factor required for façades with exterior obstructions?",0.7 -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,Internal partitions and doors,"What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,4.1. Natural lighting,"What percentage is the daylight factor required for façades with exterior obstructions?",0.7 +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,1.2.1. Internal partitions and doors,"What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,5.2. Thread Hierarchy,"What is the maximum number of threads within a thread block?",1024 +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,5.2. Thread Hierarchy,"Can you identify a thread with a four-dimensional index?",NO +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.",B +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.",B +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.",C +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.",C +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.",B +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.",C +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,23.1. What is Lazy Loading?,"What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,23.1. What is Lazy Loading?,"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?",NO +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Risk Perception,"which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Risk Perception,"In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.",C +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Training Cost,"What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.",A +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Training Cost,"How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.",C +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,LLM Tokenization Introduces Unfairness,"What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.",A +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,LLM Tokenization Introduces Unfairness,"What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,U.S. Regulation,"How many AI-related regulations were enacted in the United States in 2023?",25 +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,U.S. Regulation,"Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Europe,"Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Europe,"Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.",C diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 8de335b..37f2dae 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -76,7 +76,7 @@ def find_retrieve_answer( logger.error(f"Failed to generate completion: {e}") return "Generation Error", sections_checked - logger.debug(f"Result: {response}") + logger.info(f"Result: {response}") if finding_section: response = response.strip() From a7d8dc55f1b8e0d78c21d48984d72bfbda8a1413 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 27 Jan 2025 16:35:24 +0100 Subject: [PATCH 065/120] Add gemini_perfect_context --- benchmark/gemini_find_retrieve_answer.ipynb | 4 +- benchmark/gemini_perfect_context.ipynb | 1361 +++++++++++++++++++ 2 files changed, 1363 insertions(+), 2 deletions(-) create mode 100644 benchmark/gemini_perfect_context.ipynb diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index b0fd05c..0eb12a1 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -180,8 +180,8 @@ "SYSTEM_PROMPT = \"\"\"\n", "You are given an input document and a question.\n", "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with one keys: \"answer\".\n", - "In `\"answer\"`, you will return the answer one of the following JSON types:\n", + "You will return a JSON name with one key: \"answer\".\n", + "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", "- Yes/No (for boolean questions)\n", "Is the model an LLM?\n", "{\n", diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb new file mode 100644 index 0000000..ad4bd94 --- /dev/null +++ b/benchmark/gemini_perfect_context.ipynb @@ -0,0 +1,1361 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "QrgOGtuGlyhT", + "outputId": "62d28154-d186-4417-b032-6701fd174ecd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-e3shdxjv\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-e3shdxjv\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit d125b79bb7bfdeab751f93bac37039950fe24ce5\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (0.27.1)\n", + "Collecting llama-cpp-python (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting loguru (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (2.10.5)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (6.0.2)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Collecting unsloth (from structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev56+gd125b79) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (1.26.4)\n", + "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.1.5)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (8.1.8)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (6.3.3)\n", + "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", + "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.5.1+cu121)\n", + "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", + "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", + "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.1.0)\n", + "Collecting tyro (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading tyro-0.9.12-py3-none-any.whl.metadata (9.4 kB)\n", + "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.47.1)\n", + "Collecting datasets>=2.16.0 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (5.9.5)\n", + "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.45.1)\n", + "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.2.1)\n", + "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.14.0)\n", + "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", + "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.5.2)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.21.1)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Collecting xxhash (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec>=2023.5.0 (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.11.11)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.0.12)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.0.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.18.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.4.2)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.13.1)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.6.85)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.21.0)\n", + "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", + "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.16)\n", + "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", + " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.18.3)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.0.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m288.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tyro-0.9.12-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", + "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire, llama-cpp-python\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev56+gd125b79-py3-none-any.whl size=16254 sha256=44ec8b803647a36e38d429c99dca9a41465cabf732b89ee30d835b5bf7ef397a\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-7rr3qwwf/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ae95bde960b4f7822e181432b7e15ac481d7e46da0b22e2e1070da6763218641\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4022116 sha256=7fdfa0bf98f5cf71204e7497d168a59fa164e54a9c7adecfd4bef61dd1277b44\n", + " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", + "Successfully built structured-qa fire llama-cpp-python\n", + "Installing collected packages: xxhash, watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fsspec, fire, diskcache, dill, pymupdf4llm, pydeck, multiprocess, llama-cpp-python, tyro, xformers, datasets, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 4.25.5\n", + " Uninstalling protobuf-4.25.5:\n", + " Successfully uninstalled protobuf-4.25.5\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n", + "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-25.1.1 datasets-3.2.0 dill-0.3.8 diskcache-5.6.3 fire-0.7.0 fsspec-2024.9.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 multiprocess-0.70.16 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev56+gd125b79 trl-0.13.0 tyro-0.9.12 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1 xxhash-3.5.0\n" + ] + }, + { + "data": { + "application/vnd.colab-display-data+json": { + "id": "6b04f797f18f4c54948485ed45a8dacd", + "pip_warning": { + "packages": [ + "google" + ] + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S22kTrfPlyhU", + "outputId": "f77e1fb9-837a-4674-85f4-bc4e54815ed0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-01-23 10:00:12-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 14711 (14K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "structured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0.003s \n", + "\n", + "2025-01-23 10:00:19 (5.28 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", + "\n" + ] + } + ], + "source": [ + "%pip install structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "\n", + "GEMINI_API_KEY = None\n", + "if not GEMINI_API_KEY:\n", + " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", + "genai.configure(api_key=GEMINI_API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " if model.n > 0 and model.n % 10 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " response = model.model.generate_content([section_file.read_text(), question])\n", + " logger.info(response.text)\n", + " response_json = json.loads(response.text)\n", + " answers[index] = response_json[\"answer\"]\n", + " sections[index] = response_json[\"section\"]\n", + " model.n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "whtSJwdrlyhZ" + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with one key: \"answer\".\n", + "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "{\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "AZBwRnfjlyhZ", + "outputId": "1cea7c23-9edf-45dc-d58d-09ca5b41d193" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-23 10:03:52.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:52.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:52.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:56.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:03:56.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:03.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3 Model Architecture\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:03.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:10.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:14.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:21.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.2.2 Multi-Head Attention\",\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:21.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:28.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.4 Embeddings and Softmax\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:28.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:35.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"6.2 Model Variations\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:35.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:42.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2 Hardware and Schedule\",\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:42.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:47.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2 Hardware and Schedule\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:47.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:51.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.3 Optimizer\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:04:51.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:51.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:55.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.3 Optimizer\",\n", + " \"answer\": 4000\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:05:55.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:00.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Training\",\n", + " \"answer\": \"0.1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:00.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:01.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:04.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:14.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. OUR METHOD\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:14.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:22.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Practical Benefits and Limitations.\",\n", + " \"answer\": 0.85\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:22.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:30.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ABSTRACT\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:30.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:42.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"1. INTRODUCTION\",\n", + "\"answer\": \"175\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:42.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:52.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4.1 Low-Rank-Parametrized Update Matrices\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:52.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:53.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:56.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:06:56.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:04.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3 Arithmetic Reasoning\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:04.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:13.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.1 Experimental Setup\",\n", + "\"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:13.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:21.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.1 Experimental Setup\",\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:21.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:31.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"5 Symbolic Reasoning\",\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:31.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:41.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"3.4 Robustness of Chain of Thought\",\n", + "\"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:41.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many random samples for examined to understand model errors?\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:50.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.2 Results\",\n", + " \"answer\": \"50\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:07:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5 Symbolic Reasoning\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:01.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:05.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:05.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:09.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:09.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the model use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:13.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. Decision Tree Analysis of Neural Networks\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:13.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parameters are in the y = x^2 toy model tree?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:16.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", + " \"answer\": \"39\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:16.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:20.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2.4. Recurrent Networks\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:23.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:27.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Conclusion\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:28.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:32.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:44.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:58.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:08:58.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:09.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"5. Acknowledgements\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:23.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:23.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:34.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:34.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In what year did the Bull and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:49.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Open Access Policies\",\n", + " \"answer\": 2015\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:09:49.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:01.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Chapter 5\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:01.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:02.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:06.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:20.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:37.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:37.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the daylight factor required for façades with exterior obstructions?\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:53.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. VISUAL COMFORT\",\n", + " \"answer\": \"0.7%\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:10:53.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:07.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:11.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:15.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:15.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:43.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 5\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:11:43.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:06.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 10\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 51\",\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:26.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:44.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. (29)\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:12:44.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:02.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 73\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:02.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 60\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:20.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:37.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 99\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:13:37.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:08.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 56\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:08.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 79\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:14:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:30.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:51.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 18\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:15:51.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:09.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 65\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:09.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:15.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:18.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:18.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:28.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"OVERVIEW AND GOAL\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:28.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:38.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:45.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"6\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:45.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"Turn overview\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:16:54.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:03.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CHAPTER OVERVIEW\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:03.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:13.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"A. Take a Chapter card\",\n", + " \"answer\": \"3\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:13.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:21.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Chapter Overview\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:21.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:29.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CARD AND TILE COSTS\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:29.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:39.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. CARD AND TILE COSTS\",\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:17:39.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:39.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:48.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:48.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:57.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\"section\": \"CARD AND TILE EFFECTS\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:18:57.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. CARD AND TILE EFFECTS\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:13.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:13.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:22.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:22.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:30.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"END OF THE GAME\",\n", + " \"answer\": \"7\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:30.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:32.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:36.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:36.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:44.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"LOOKOUT PHASE\",\n", + " \"answer\": 4\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:44.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:51.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"LOOKOUT PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:51.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:58.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ACTION PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:19:58.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:05.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"EXPEDITION PHASE\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:05.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:12.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME FLOW\",\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:12.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:19.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME FLOW\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:19.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:27.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"9. ACTIONS\",\n", + " \"answer\": \"1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:27.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:36.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"ACTIONS\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:36.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:43.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME END\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-23 10:20:43.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-23 10:21:43.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-23 10:21:50.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME END\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"document\"):\n", + " section_file = Path(f\"benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "id": "EYYJgWf6lyha", + "outputId": "347014fe-d6e6-4d0c-e094-dc5fcbed295e" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 5,\n \"max\": 74,\n \"num_unique_values\": 6,\n \"samples\": [\n 5,\n 13,\n 74\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"3 Model Architecture\",\n \"3 Experimental Results\",\n \"Natural lighting\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Does the final model use learned positional embeddings?\",\n \"How many parameters are in the y = x^2 toy model tree?\",\n \"What is the daylight factor required for fa\\u00e7ades with exterior obstructions?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"14\",\n \"0.7\",\n \"850\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"39\",\n \"0.7%\",\n \"0.85\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"6.2 Model Variations\",\n \"Table 1. Computation and memory analysis of toy problems.\",\n \"4. VISUAL COMFORT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
55https://arxiv.org/pdf/1706.037623 Model ArchitectureDoes the final model use learned positional em...NOYES6.2 Model Variations
1313https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many parameters are in the y = x^2 toy mod...1439Table 1. Computation and memory analysis of to...
1818https://arxiv.org/pdf/2106.09685v2.pdf5.5 Scaling Up to GPT-3How much memory is saved (in GB) when training...8500.854. Practical Benefits and Limitations.
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Prohibited AI Practices (Article 5)Which type of AI systems are banned by the AI ...CBArticle 5
3939https://authorsalliance.org/wp-content/uploads...Chapter 5 Where do you want to make your work ...Are Gold Open Access and Green Open Access mut...NOYESChapter 5
7474https://commission.europa.eu/document/download...Natural lightingWhat is the daylight factor required for façad...0.70.7%4. VISUAL COMFORT
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "5 5 https://arxiv.org/pdf/1706.03762 \n", + "13 13 https://arxiv.org/pdf/2210.05189 \n", + "18 18 https://arxiv.org/pdf/2106.09685v2.pdf \n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "39 39 https://authorsalliance.org/wp-content/uploads... \n", + "74 74 https://commission.europa.eu/document/download... \n", + "\n", + " section \\\n", + "5 3 Model Architecture \n", + "13 3 Experimental Results \n", + "18 5.5 Scaling Up to GPT-3 \n", + "22 Prohibited AI Practices (Article 5) \n", + "39 Chapter 5 Where do you want to make your work ... \n", + "74 Natural lighting \n", + "\n", + " question answer pred_answer \\\n", + "5 Does the final model use learned positional em... NO YES \n", + "13 How many parameters are in the y = x^2 toy mod... 14 39 \n", + "18 How much memory is saved (in GB) when training... 850 0.85 \n", + "22 Which type of AI systems are banned by the AI ... C B \n", + "39 Are Gold Open Access and Green Open Access mut... NO YES \n", + "74 What is the daylight factor required for façad... 0.7 0.7% \n", + "\n", + " pred_section \n", + "5 6.2 Model Variations \n", + "13 Table 1. Computation and memory analysis of to... \n", + "18 4. Practical Benefits and Limitations. \n", + "22 Article 5 \n", + "39 Chapter 5 \n", + "74 4. VISUAL COMFORT " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "36597dcf-ea15-414d-d66f-a4cb9102c4da" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9210526315789473" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 308ab91622824687ea228f454afe7716f59bc0d1 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 27 Jan 2025 16:53:40 +0100 Subject: [PATCH 066/120] Update --- benchmark/gemini_perfect_context.ipynb | 4 ++-- pyproject.toml | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index ad4bd94..e978c17 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -914,12 +914,12 @@ "\n", "\n", "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"benchmark/structured_qa.csv\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", "data[\"pred_answer\"] = [None] * len(data)\n", "data[\"pred_section\"] = [None] * len(data)\n", "\n", "for section_name, section_data in data.groupby(\"document\"):\n", - " section_file = Path(f\"benchmark/perfect_context/{section_name}.txt\")\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", "\n", " answers, sections = process_section_questions(section_file, section_data, model)\n", "\n", diff --git a/pyproject.toml b/pyproject.toml index ebdabbe..ef10196 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,13 +11,11 @@ dynamic = ["version"] dependencies = [ "fire", "huggingface-hub", - "llama-cpp-python", "loguru", "pydantic", "pymupdf4llm", "pyyaml", "streamlit", - "unsloth", ] [project.optional-dependencies] From 704050b92a21050a48b0c82b58599ac0fede627b Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 27 Jan 2025 17:01:07 +0100 Subject: [PATCH 067/120] fix line --- benchmark/structured_qa.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index b720c48..726202a 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -11,7 +11,7 @@ https://arxiv.org/pdf/1706.03762,5.3 Optimizer,"What optimizer was used for trai https://arxiv.org/pdf/1706.03762,5.3 Optimizer,"How many warmup steps were used?",4000 https://arxiv.org/pdf/1706.03762,5.4 Regularization,"What was the dropout rate used for the base model?",0.1 https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,"Can recurrent networks also be converted to decision trees?",YES -https://arxiv.org/pdf/2210.05189,3 Experimental Results"How many layers are in the toy model (y = x^2)?",3 +https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the toy model (y = x^2)?",3 https://arxiv.org/pdf/2210.05189,3 Experimental Results,"Does the toy model (y = x^2) use Sigmoid activation function?",NO https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many parameters are in the toy model (y = x^2) tree?",14 https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the half-moon neural network?",3 From 67b8f804fe141a76957f89f0f882539b07397552 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 27 Jan 2025 17:08:27 +0100 Subject: [PATCH 068/120] fix line --- benchmark/structured_qa.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 726202a..c22680f 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -1,7 +1,7 @@ document,section,question,answer https://arxiv.org/pdf/1706.03762,3 Model Architecture,"What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder",C https://arxiv.org/pdf/1706.03762,3.1 Encoder and Decoder Stacks,"How many layers compose the encoder?",6 -https://arxiv.org/pdf/1706.03762,3.1 Encoder and Decoder Stacks"How many layers compose the decoder?",6 +https://arxiv.org/pdf/1706.03762,3.1 Encoder and Decoder Stacks,"How many layers compose the decoder?",6 https://arxiv.org/pdf/1706.03762,3.2.2 Multi-Head Attention,"How many parallel attention heads are used?",8 https://arxiv.org/pdf/1706.03762,3.4 Embeddings and Softmax,"Does the final model use learned embeddings for the input and output tokens?",YES https://arxiv.org/pdf/1706.03762,3.5 Positional Encoding,"Does the final model use learned positional embeddings?",NO From a6bfe34c1e69db1dca917dec7c24db2f80539944 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 14:41:28 +0100 Subject: [PATCH 069/120] Update perfect_context --- benchmark/gemini_perfect_context.ipynb | 282 +----------------- .../5.5 SCALING UP TO GPT-3 175B.txt | 11 - benchmark/structured_qa.csv | 1 - 3 files changed, 15 insertions(+), 279 deletions(-) delete mode 100644 benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index e978c17..d585f80 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -47,250 +47,14 @@ "id": "QrgOGtuGlyhT", "outputId": "62d28154-d186-4417-b032-6701fd174ecd" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-e3shdxjv\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-e3shdxjv\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit d125b79bb7bfdeab751f93bac37039950fe24ce5\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (0.27.1)\n", - "Collecting llama-cpp-python (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting loguru (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (2.10.5)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (6.0.2)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Collecting unsloth (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev56+gd125b79) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.12.2)\n", - "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (1.26.4)\n", - "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.1.5)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (8.1.8)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (6.3.3)\n", - "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", - "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.5.1+cu121)\n", - "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", - "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", - "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.1.0)\n", - "Collecting tyro (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading tyro-0.9.12-py3-none-any.whl.metadata (9.4 kB)\n", - "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.47.1)\n", - "Collecting datasets>=2.16.0 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.0)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (5.9.5)\n", - "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.45.1)\n", - "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.2.1)\n", - "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.14.0)\n", - "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", - "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", - "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.5.2)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.21.1)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", - "Collecting xxhash (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", - "Collecting fsspec>=2023.5.0 (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.11.11)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.0.12)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.0.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.18.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.4.2)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.13.1)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.6.85)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2024.11.6)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.21.0)\n", - "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", - "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.16)\n", - "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.4.1)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.18.3)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.0.2)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m288.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tyro-0.9.12-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", - "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", - "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire, llama-cpp-python\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev56+gd125b79-py3-none-any.whl size=16254 sha256=44ec8b803647a36e38d429c99dca9a41465cabf732b89ee30d835b5bf7ef397a\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-7rr3qwwf/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ae95bde960b4f7822e181432b7e15ac481d7e46da0b22e2e1070da6763218641\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4022116 sha256=7fdfa0bf98f5cf71204e7497d168a59fa164e54a9c7adecfd4bef61dd1277b44\n", - " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", - "Successfully built structured-qa fire llama-cpp-python\n", - "Installing collected packages: xxhash, watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fsspec, fire, diskcache, dill, pymupdf4llm, pydeck, multiprocess, llama-cpp-python, tyro, xformers, datasets, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", - " Attempting uninstall: protobuf\n", - " Found existing installation: protobuf 4.25.5\n", - " Uninstalling protobuf-4.25.5:\n", - " Successfully uninstalled protobuf-4.25.5\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2024.10.0\n", - " Uninstalling fsspec-2024.10.0:\n", - " Successfully uninstalled fsspec-2024.10.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", - "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n", - "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-25.1.1 datasets-3.2.0 dill-0.3.8 diskcache-5.6.3 fire-0.7.0 fsspec-2024.9.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 multiprocess-0.70.16 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev56+gd125b79 trl-0.13.0 tyro-0.9.12 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1 xxhash-3.5.0\n" - ] - }, - { - "data": { - "application/vnd.colab-display-data+json": { - "id": "6b04f797f18f4c54948485ed45a8dacd", - "pip_warning": { - "packages": [ - "google" - ] - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -298,27 +62,9 @@ "id": "S22kTrfPlyhU", "outputId": "f77e1fb9-837a-4674-85f4-bc4e54815ed0" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-01-23 10:00:12-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 14711 (14K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", - "\n", - "structured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0.003s \n", - "\n", - "2025-01-23 10:00:19 (5.28 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install structured-qa" + "%pip install -e ./structured-qa" ] }, { @@ -340,12 +86,13 @@ "source": [ "import os\n", "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", "\n", - "GEMINI_API_KEY = None\n", - "if not GEMINI_API_KEY:\n", - " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", - "genai.configure(api_key=GEMINI_API_KEY)" + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, { @@ -396,7 +143,7 @@ " logger.info(response.text)\n", " response_json = json.loads(response.text)\n", " answers[index] = response_json[\"answer\"]\n", - " sections[index] = response_json[\"section\"]\n", + " sections[index] = None\n", " model.n += 1\n", " return answers, sections" ] @@ -464,7 +211,8 @@ " generation_config={\n", " \"response_mime_type\": \"application/json\",\n", " },\n", - ")" + ")\n", + "model.n = 0" ] }, { @@ -918,7 +666,7 @@ "data[\"pred_answer\"] = [None] * len(data)\n", "data[\"pred_section\"] = [None] * len(data)\n", "\n", - "for section_name, section_data in data.groupby(\"document\"):\n", + "for section_name, section_data in data.groupby(\"section\"):\n", " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", "\n", " answers, sections = process_section_questions(section_file, section_data, model)\n", diff --git a/benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt b/benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt deleted file mode 100644 index 7d2962a..0000000 --- a/benchmark/perfect_context/5.5 SCALING UP TO GPT-3 175B.txt +++ /dev/null @@ -1,11 +0,0 @@ -As a final stress test for LoRA, we scale up to GPT-3 with 175 billion parameters. Due to the high -training cost, we only report the typical standard deviation for a given task over random seeds, as -opposed to providing one for every entry. See Section D.4 for details on the hyperparameters used. -As shown in Table 4, LoRA matches or exceeds the fine-tuning baseline on all three datasets. Note -that not all methods benefit monotonically from having more trainable parameters, as shown in Fig- -ure 2. We observe a significant performance drop when we use more than 256 special tokens for -prefix-embedding tuning or more than 32 special tokens for prefix-layer tuning. This corroborates -similar observations in Li & Liang (2021). While a thorough investigation into this phenomenon -is out-of-scope for this work, we suspect that having more special tokens causes the input distri- -bution to shift further away from the pre-training data distribution. Separately, we investigate the -performance of different adaptation approaches in the low-data regime in Section F.3 diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index c22680f..92707c2 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -17,7 +17,6 @@ https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many parameters are https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the half-moon neural network?",3 https://arxiv.org/pdf/2210.05189,3 Experimental Results,"What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy",B https://arxiv.org/pdf/2106.09685.pdf,4 OUR METHOD,Does LoRA work with any neural network containing dense layers?,YES -https://arxiv.org/pdf/2106.09685.pdf,5.5 SCALING UP TO GPT-3 175B,"How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?",850 https://arxiv.org/pdf/2106.09685.pdf,LORA ABSTRACT,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,"In billions, how many trainable parameters does GPT-3 have?",175 https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,Does LoRA introduce additional inference latency compared to full fine-tuning?,NO From 39a17ae98902cccfc26816171f3c10e775f355f1 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 15:00:22 +0100 Subject: [PATCH 070/120] Add missing perfect context --- benchmark/perfect_context/Penalties.txt | 57 +++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/benchmark/perfect_context/Penalties.txt b/benchmark/perfect_context/Penalties.txt index e69de29..62633ff 100644 --- a/benchmark/perfect_context/Penalties.txt +++ b/benchmark/perfect_context/Penalties.txt @@ -0,0 +1,57 @@ +1. In accordance with the terms and conditions laid down in this Regulation, Member States shall lay down the rules on +penalties and other enforcement measures, which may also include warnings and non-monetary measures, applicable to +infringements of this Regulation by operators, and shall take all measures necessary to ensure that they are properly and +effectively implemented, thereby taking into account the guidelines issued by the Commission pursuant to Article 96. The +penalties provided for shall be effective, proportionate and dissuasive. They shall take into account the interests of SMEs, +including start-ups, and their economic viability. +2. The Member States shall, without delay and at the latest by the date of entry into application, notify the Commission +of the rules on penalties and of other enforcement measures referred to in paragraph 1, and shall notify it, without delay, of +any subsequent amendment to them. +3. Non-compliance with the prohibition of the AI practices referred to in Article 5 shall be subject to administrative +fines of up to EUR 35 000 000 or, if the offender is an undertaking, up to 7 % of its total worldwide annual turnover for the +preceding financial year, whichever is higher. +4. Non-compliance with any of the following provisions related to operators or notified bodies, other than those laid +down in Articles 5, shall be subject to administrative fines of up to EUR 15 000 000 or, if the offender is an undertaking, up +to 3 % of its total worldwide annual turnover for the preceding financial year, whichever is higher: +(a) obligations of providers pursuant to Article 16; +(b) obligations of authorised representatives pursuant to Article 22; +(c) obligations of importers pursuant to Article 23; +(d) obligations of distributors pursuant to Article 24; +(e) obligations of deployers pursuant to Article 26; +(f) requirements and obligations of notified bodies pursuant to Article 31, Article 33(1), (3) and (4) or Article 34; +(g) transparency obligations for providers and deployers pursuant to Article 50. +5. The supply of incorrect, incomplete or misleading information to notified bodies or national competent authorities in +reply to a request shall be subject to administrative fines of up to EUR 7 500 000 or, if the offender is an undertaking, up to +1 % of its total worldwide annual turnover for the preceding financial year, whichever is higher. +6. In the case of SMEs, including start-ups, each fine referred to in this Article shall be up to the percentages or amount +referred to in paragraphs 3, 4 and 5, whichever thereof is lower. +7. When deciding whether to impose an administrative fine and when deciding on the amount of the administrative fine +in each individual case, all relevant circumstances of the specific situation shall be taken into account and, as appropriate, +regard shall be given to the following: +(a) the nature, gravity and duration of the infringement and of its consequences, taking into account the purpose of the AI +system, as well as, where appropriate, the number of affected persons and the level of damage suffered by them; +(b) whether administrative fines have already been applied by other market surveillance authorities to the same operator for +the same infringement; +(c) whether administrative fines have already been applied by other authorities to the same operator for infringements of +other Union or national law, when such infringements result from the same activity or omission constituting a relevant +infringement of this Regulation; +(d) the size, the annual turnover and market share of the operator committing the infringement; +(e) any other aggravating or mitigating factor applicable to the circumstances of the case, such as financial benefits gained, +or losses avoided, directly or indirectly, from the infringement; +(f) the degree of cooperation with the national competent authorities, in order to remedy the infringement and mitigate the +possible adverse effects of the infringement; +(g) the degree of responsibility of the operator taking into account the technical and organisational measures implemented +by it; +(h) the manner in which the infringement became known to the national competent authorities, in particular whether, and +if so to what extent, the operator notified the infringement; +(i) the intentional or negligent character of the infringement; +(j) any action taken by the operator to mitigate the harm suffered by the affected persons. +8. Each Member State shall lay down rules on to what extent administrative fines may be imposed on public authorities +and bodies established in that Member State. +9. Depending on the legal system of the Member States, the rules on administrative fines may be applied in such +a manner that the fines are imposed by competent national courts or by other bodies, as applicable in those Member States. +The application of such rules in those Member States shall have an equivalent effect. +10. The exercise of powers under this Article shall be subject to appropriate procedural safeguards in accordance with +Union and national law, including effective judicial remedies and due process. +11. Member States shall, on an annual basis, report to the Commission about the administrative fines they have issued +during that year, in accordance with this Article, and about any related litigation or judicial proceedings \ No newline at end of file From ae325d3fd34f87be6ec8ca17d9b56a9a96c983fd Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 15:04:05 +0100 Subject: [PATCH 071/120] Updates --- benchmark/gemini_full_context.ipynb | 11 ++++++----- benchmark/structured_qa.csv | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index 520f7d2..bc9ee6f 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -340,12 +340,13 @@ "source": [ "import os\n", "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", "\n", - "GEMINI_API_KEY = None\n", - "if not GEMINI_API_KEY:\n", - " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", - "genai.configure(api_key=GEMINI_API_KEY)" + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, { diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 92707c2..b942988 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -43,7 +43,7 @@ https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usua https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4,B https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,3 -https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples for examined to understand model errors?,50 +https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples were examined to understand model performance?,100 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many chapters does the game last?,3 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many victory conditions are there?,3 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,How many different races are there?,6 @@ -74,7 +74,7 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rule https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,"If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie",A https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,2.1. Toilets,"Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold",B https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,CARBON MONOXIDE DETECTION AND VENTING,"In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground",C -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,4.1. Natural lighting,"What percentage is the daylight factor required for façades with exterior obstructions?",0.7 +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,4.1. Natural lighting,"What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%",A https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,1.2.1. Internal partitions and doors,"What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,5.2. Thread Hierarchy,"What is the maximum number of threads within a thread block?",1024 https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,5.2. Thread Hierarchy,"Can you identify a thread with a four-dimensional index?",NO From 56d8620e3296d6c1fb6b77aff7262c0d699619e7 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 16:07:50 +0100 Subject: [PATCH 072/120] Update gemini_ragatouille --- benchmark/gemini_RAGatouille.ipynb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb index 672520b..9fae413 100644 --- a/benchmark/gemini_RAGatouille.ipynb +++ b/benchmark/gemini_RAGatouille.ipynb @@ -657,12 +657,13 @@ "source": [ "import os\n", "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", "\n", - "GEMINI_API_KEY = \"AIzaSyDxFKx8bdm3sAhQsy04jy2OPNl_tLZiKlY\"\n", - "if not GEMINI_API_KEY:\n", - " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", - "genai.configure(api_key=GEMINI_API_KEY)" + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, { @@ -822,7 +823,8 @@ " generation_config={\n", " \"response_mime_type\": \"application/json\",\n", " },\n", - ")" + ")\n", + "model.n = 0" ] }, { @@ -1722,8 +1724,6 @@ "\n", "import pandas as pd\n", "\n", - "model.n = 0\n", - "\n", "logger.info(\"Loading input data\")\n", "data = pd.read_csv(\"structured_qa.csv\")\n", "data[\"pred_answer\"] = [None] * len(data)\n", From eb00902402451ea15241e42b2ae4ba3b9033d3c8 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 16:10:12 +0100 Subject: [PATCH 073/120] Update gemini_fra --- benchmark/gemini_find_retrieve_answer.ipynb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index 0eb12a1..6c9746e 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -61,12 +61,13 @@ "source": [ "import os\n", "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", "\n", - "GEMINI_API_KEY = None\n", - "if not GEMINI_API_KEY:\n", - " raise ValueError(\"Please set the GEMINI_API_KEY variable to your API key\")\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n", - "genai.configure(api_key=GEMINI_API_KEY)" + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, { From 1d06d2cb517eb9762da6f636a88aed64be40116e Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 16:16:05 +0100 Subject: [PATCH 074/120] Update --- benchmark/gemini_RAGatouille.ipynb | 4 ++-- benchmark/gemini_find_retrieve_answer.ipynb | 4 ++-- benchmark/gemini_full_context.ipynb | 4 ++-- benchmark/gemini_perfect_context.ipynb | 4 ++-- benchmark/perfect_context/Penalties.txt | 2 +- src/structured_qa/workflow.py | 6 ++---- 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb index 9fae413..c30fcf9 100644 --- a/benchmark/gemini_RAGatouille.ipynb +++ b/benchmark/gemini_RAGatouille.ipynb @@ -660,9 +660,9 @@ "from google.colab.userdata import get, SecretNotFoundError\n", "\n", "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index 6c9746e..4d666fa 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -64,9 +64,9 @@ "from google.colab.userdata import get, SecretNotFoundError\n", "\n", "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index bc9ee6f..73a76f0 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -343,9 +343,9 @@ "from google.colab.userdata import get, SecretNotFoundError\n", "\n", "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index d585f80..a221b2d 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -89,9 +89,9 @@ "from google.colab.userdata import get, SecretNotFoundError\n", "\n", "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" ] }, diff --git a/benchmark/perfect_context/Penalties.txt b/benchmark/perfect_context/Penalties.txt index 62633ff..bfb9d1c 100644 --- a/benchmark/perfect_context/Penalties.txt +++ b/benchmark/perfect_context/Penalties.txt @@ -54,4 +54,4 @@ The application of such rules in those Member States shall have an equivalent ef 10. The exercise of powers under this Article shall be subject to appropriate procedural safeguards in accordance with Union and national law, including effective judicial remedies and due process. 11. Member States shall, on an annual basis, report to the Commission about the administrative fines they have issued -during that year, in accordance with this Article, and about any related litigation or judicial proceedings \ No newline at end of file +during that year, in accordance with this Article, and about any related litigation or judicial proceedings diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 37f2dae..3b7299e 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -1,13 +1,11 @@ from pathlib import Path - -from llama_cpp import Llama from loguru import logger def find_retrieve_answer( question: str, - model: Llama, + model, sections_dir: str, find_prompt: str, answer_prompt: str, @@ -17,7 +15,7 @@ def find_retrieve_answer( Args: question (str): The question to answer. - model (Llama): The Llama model to use for generating completions. + model: The model to use for generating completions. sections_dir (str): The directory containing the sections. See [`document_to_sections_dir`][structured_qa.preprocessing.document_to_sections_dir]. Structure of the sections directory: From 8ac9201529a73398b712bbcb79e3468783b99cfb Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 16:59:48 +0100 Subject: [PATCH 075/120] Update --- benchmark/gemini_find_retrieve_answer.ipynb | 37 +-------------------- 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index 4d666fa..56b0b5f 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -172,38 +172,6 @@ "from structured_qa.model_loaders import load_gemini_model" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with one key: \"answer\".\n", - "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"answer\": 12\n", - "}\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model?\n", - "-A: ReLU\n", - "-B: Sigmoid\n", - "-C: Tanh\n", - "{\n", - " \"answer\": \"C\"\n", - "}\n", - "\"\"\"" - ] - }, { "cell_type": "code", "execution_count": null, @@ -212,10 +180,7 @@ "source": [ "model = load_gemini_model(\n", " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", + " system_prompt=None\n", ")" ] }, From 035217330494d2edbd5a76bf57bfafe51b2c3178 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 18:54:56 +0100 Subject: [PATCH 076/120] Drop some log --- benchmark/gemini_find_retrieve_answer.ipynb | 5 +---- src/structured_qa/workflow.py | 2 -- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index 56b0b5f..d1a2fba 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -178,10 +178,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=None\n", - ")" + "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=None)" ] }, { diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 3b7299e..9df9f6a 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -74,8 +74,6 @@ def find_retrieve_answer( logger.error(f"Failed to generate completion: {e}") return "Generation Error", sections_checked - logger.info(f"Result: {response}") - if finding_section: response = response.strip() logger.info(f"Retrieving section: {response}") From 0b8e5cf9d2db91af71478a715bfdbba1b36316fa Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 28 Jan 2025 19:50:30 +0100 Subject: [PATCH 077/120] Update --- src/structured_qa/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 9df9f6a..af76f9f 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -76,7 +76,7 @@ def find_retrieve_answer( if finding_section: response = response.strip() - logger.info(f"Retrieving section: {response}") + logger.debug(f"Retrieving section: {response}") if response in sections_names: section_content = (sections_dir / f"{response}.txt").read_text() current_section = response From e2c54575b44a6f8f2f732ea13bc52621cb754de9 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 29 Jan 2025 10:11:38 +0100 Subject: [PATCH 078/120] Update gemini_perfect_context with results --- benchmark/gemini_perfect_context.ipynb | 2279 ++++++++++++------------ 1 file changed, 1185 insertions(+), 1094 deletions(-) diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index a221b2d..0352686 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -1,1109 +1,1200 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oD2lVadPlyhR" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JZeb4ABvlyhS" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aI1i7vlyhS" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "atlPXFshlyhS" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] }, - "id": "QrgOGtuGlyhT", - "outputId": "62d28154-d186-4417-b032-6701fd174ecd" - }, - "outputs": [], - "source": [ - "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] }, - "id": "S22kTrfPlyhU", - "outputId": "f77e1fb9-837a-4674-85f4-bc4e54815ed0" - }, - "outputs": [], - "source": [ - "%pip install -e ./structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qwHWJEsulyhV" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "iJ812u2llyhV" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MKijHC_ClyhX" - }, - "source": [ - "## Function to Process all questions for a single Section" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "oFU-eYMVlyhX" - }, - "outputs": [], - "source": [ - "import json\n", - "import time\n", - "\n", - "\n", - "def process_section_questions(\n", - " section_file,\n", - " section_data,\n", - " model,\n", - "):\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in section_data.iterrows():\n", - " if model.n > 0 and model.n % 10 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " response = model.model.generate_content([section_file.read_text(), question])\n", - " logger.info(response.text)\n", - " response_json = json.loads(response.text)\n", - " answers[index] = response_json[\"answer\"]\n", - " sections[index] = None\n", - " model.n += 1\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "whtSJwdrlyhZ" - }, - "outputs": [], - "source": [ - "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with one key: \"answer\".\n", - "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"answer\": 12\n", - "}\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "{\n", - " \"answer\": \"C\"\n", - "}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "ObsvwlNslyhZ" - }, - "outputs": [], - "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")\n", - "model.n = 0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W97jWzzOlyhZ" - }, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] }, - "id": "AZBwRnfjlyhZ", - "outputId": "1cea7c23-9edf-45dc-d58d-09ca5b41d193" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-23 10:03:52.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:52.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:52.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:56.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:56.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:03.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3 Model Architecture\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:03.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:10.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:14.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:21.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.2.2 Multi-Head Attention\",\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:21.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:28.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.4 Embeddings and Softmax\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:28.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:35.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"6.2 Model Variations\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:35.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:42.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2 Hardware and Schedule\",\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:42.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:47.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2 Hardware and Schedule\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:47.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:51.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.3 Optimizer\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:51.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:51.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:55.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.3 Optimizer\",\n", - " \"answer\": 4000\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:55.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:00.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Training\",\n", - " \"answer\": \"0.1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:00.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:01.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:04.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:14.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. OUR METHOD\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:14.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:22.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Practical Benefits and Limitations.\",\n", - " \"answer\": 0.85\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:22.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:30.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"ABSTRACT\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:30.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:42.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"1. INTRODUCTION\",\n", - "\"answer\": \"175\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:42.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:52.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4.1 Low-Rank-Parametrized Update Matrices\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:52.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:53.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:56.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:56.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:04.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3 Arithmetic Reasoning\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:04.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:13.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.1 Experimental Setup\",\n", - "\"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:13.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:21.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.1 Experimental Setup\",\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:21.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:31.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"5 Symbolic Reasoning\",\n", - "\"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:31.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:41.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.4 Robustness of Chain of Thought\",\n", - "\"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:41.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many random samples for examined to understand model errors?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:50.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.2 Results\",\n", - " \"answer\": \"50\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5 Symbolic Reasoning\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:05.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:05.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:09.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:09.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the model use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:13.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. Decision Tree Analysis of Neural Networks\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:13.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parameters are in the y = x^2 toy model tree?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:16.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", - " \"answer\": \"39\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:16.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:20.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2.4. Recurrent Networks\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:23.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:27.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Conclusion\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:28.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:32.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:44.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:58.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:58.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:09.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"5. Acknowledgements\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:23.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:23.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:34.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:34.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In what year did the Bull and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:49.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Open Access Policies\",\n", - " \"answer\": 2015\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:49.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:01.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Chapter 5\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:01.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:02.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:06.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:20.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:37.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:37.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the daylight factor required for façades with exterior obstructions?\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:53.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. VISUAL COMFORT\",\n", - " \"answer\": \"0.7%\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:53.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:07.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:11.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:15.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:15.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:43.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 5\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:43.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:06.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 10\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 51\",\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:26.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:44.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. (29)\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:44.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:02.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 73\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:02.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 60\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:20.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:37.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 99\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:37.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:08.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 56\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:08.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 79\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:30.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:51.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 18\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:51.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:09.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 65\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:09.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:15.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:18.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:18.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:28.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"OVERVIEW AND GOAL\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:28.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:38.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:45.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"6\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:45.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Turn overview\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:54.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:03.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CHAPTER OVERVIEW\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:03.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:13.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"A. Take a Chapter card\",\n", - " \"answer\": \"3\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:13.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:21.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Chapter Overview\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:21.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:29.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CARD AND TILE COSTS\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:29.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:39.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. CARD AND TILE COSTS\",\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:39.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:39.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:48.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:48.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:57.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"CARD AND TILE EFFECTS\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:57.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:13.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:13.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:22.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:22.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:30.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": \"7\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:30.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:32.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:36.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:36.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:44.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"LOOKOUT PHASE\",\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:44.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:51.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"LOOKOUT PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:51.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:58.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"ACTION PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:58.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:05.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"EXPEDITION PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:05.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:12.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME FLOW\",\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:12.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:19.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME FLOW\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:19.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:27.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"9. ACTIONS\",\n", - " \"answer\": \"1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:27.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:36.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"ACTIONS\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:36.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:43.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME END\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:43.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:21:43.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:21:50.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME END\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "\n", - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for section_name, section_data in data.groupby(\"section\"):\n", - " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", - "\n", - " answers, sections = process_section_questions(section_file, section_data, model)\n", - "\n", - " for index in section_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a9eMkW1-lyha" - }, - "source": [ - "# Results" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 238 + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] }, - "id": "EYYJgWf6lyha", - "outputId": "347014fe-d6e6-4d0c-e094-dc5fcbed295e" - }, - "outputs": [ { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 5,\n \"max\": 74,\n \"num_unique_values\": 6,\n \"samples\": [\n 5,\n 13,\n 74\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"3 Model Architecture\",\n \"3 Experimental Results\",\n \"Natural lighting\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Does the final model use learned positional embeddings?\",\n \"How many parameters are in the y = x^2 toy model tree?\",\n \"What is the daylight factor required for fa\\u00e7ades with exterior obstructions?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"14\",\n \"0.7\",\n \"850\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"39\",\n \"0.7%\",\n \"0.85\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"6.2 Model Variations\",\n \"Table 1. Computation and memory analysis of toy problems.\",\n \"4. VISUAL COMFORT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QrgOGtuGlyhT", + "outputId": "e5fcc547-6580-4068-d844-28afc1c28d9c" }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
55https://arxiv.org/pdf/1706.037623 Model ArchitectureDoes the final model use learned positional em...NOYES6.2 Model Variations
1313https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many parameters are in the y = x^2 toy mod...1439Table 1. Computation and memory analysis of to...
1818https://arxiv.org/pdf/2106.09685v2.pdf5.5 Scaling Up to GPT-3How much memory is saved (in GB) when training...8500.854. Practical Benefits and Limitations.
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Prohibited AI Practices (Article 5)Which type of AI systems are banned by the AI ...CBArticle 5
3939https://authorsalliance.org/wp-content/uploads...Chapter 5 Where do you want to make your work ...Are Gold Open Access and Green Open Access mut...NOYESChapter 5
7474https://commission.europa.eu/document/download...Natural lightingWhat is the daylight factor required for façad...0.70.7%4. VISUAL COMFORT
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'structured-qa'...\n", + "remote: Enumerating objects: 724, done.\u001b[K\n", + "remote: Counting objects: 100% (162/162), done.\u001b[K\n", + "remote: Compressing objects: 100% (101/101), done.\u001b[K\n", + "remote: Total 724 (delta 100), reused 74 (delta 61), pack-reused 562 (from 1)\u001b[K\n", + "Receiving objects: 100% (724/724), 2.23 MiB | 6.64 MiB/s, done.\n", + "Resolving deltas: 100% (382/382), done.\n" + ] + } ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "5 5 https://arxiv.org/pdf/1706.03762 \n", - "13 13 https://arxiv.org/pdf/2210.05189 \n", - "18 18 https://arxiv.org/pdf/2106.09685v2.pdf \n", - "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "39 39 https://authorsalliance.org/wp-content/uploads... \n", - "74 74 https://commission.europa.eu/document/download... \n", - "\n", - " section \\\n", - "5 3 Model Architecture \n", - "13 3 Experimental Results \n", - "18 5.5 Scaling Up to GPT-3 \n", - "22 Prohibited AI Practices (Article 5) \n", - "39 Chapter 5 Where do you want to make your work ... \n", - "74 Natural lighting \n", - "\n", - " question answer pred_answer \\\n", - "5 Does the final model use learned positional em... NO YES \n", - "13 How many parameters are in the y = x^2 toy mod... 14 39 \n", - "18 How much memory is saved (in GB) when training... 850 0.85 \n", - "22 Which type of AI systems are banned by the AI ... C B \n", - "39 Are Gold Open Access and Green Open Access mut... NO YES \n", - "74 What is the daylight factor required for façad... 0.7 0.7% \n", - "\n", - " pred_section \n", - "5 6.2 Model Variations \n", - "13 Table 1. Computation and memory analysis of to... \n", - "18 4. Practical Benefits and Limitations. \n", - "22 Article 5 \n", - "39 Chapter 5 \n", - "74 4. VISUAL COMFORT " + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "wfz1XQDLlyha", - "outputId": "36597dcf-ea15-414d-d66f-a4cb9102c4da" - }, - "outputs": [ { - "data": { - "text/plain": [ - "0.9210526315789473" + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S22kTrfPlyhU", + "outputId": "40005814-676a-4abd-8d00-3b86d1e3a3f0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Processing ./structured-qa\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", + "Building wheels for collected packages: structured-qa\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=2defc7ec99afa5814e6713aee2aca5a6364f8b2a2b97e206aa16ccabf91113eb\n", + " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", + "Successfully built structured-qa\n", + "Installing collected packages: structured-qa\n", + " Attempting uninstall: structured-qa\n", + " Found existing installation: structured-qa 0.3.3.dev71+gae325d3\n", + " Uninstalling structured-qa-0.3.3.dev71+gae325d3:\n", + " Successfully uninstalled structured-qa-0.3.3.dev71+gae325d3\n", + "Successfully installed structured-qa-0.3.3.dev71+gae325d3\n" + ] + } + ], + "source": [ + "%pip install ./structured-qa" ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", + "\n", + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " if model.n > 0 and model.n % 10 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " response = model.model.generate_content([section_file.read_text(), question])\n", + " logger.info(response.text)\n", + " response_json = json.loads(response.text)\n", + " answers[index] = response_json[\"answer\"]\n", + " sections[index] = None\n", + " model.n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "whtSJwdrlyhZ" + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with one key: \"answer\".\n", + "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "{\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")\n", + "model.n = 0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "AZBwRnfjlyhZ", + "outputId": "e8e05071-0cad-41b5-c3f5-a0437357270d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[32m2025-01-28 14:06:32.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:32.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:32.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:34.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 175\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:34.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:40.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:40.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:41.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:41.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:43.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:43.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:50.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:50.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:50.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:55.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:55.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:56.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:56.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:58.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 14\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:58.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:00.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:00.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:04.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:04.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:06.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 100\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:23.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:24.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:24.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:31.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1024\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:31.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:34.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:34.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4000\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 0.1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:40.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:40.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:40.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:42.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:44.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:52.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:52.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:56.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:56.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:58.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:58.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:12:58.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:02.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:02.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:07.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:07.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:10.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:17.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:24.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:24.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 7\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:32.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:32.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:34.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:38.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:40.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:40.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:43.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:43.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:50.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:50.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:56.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:56.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:02.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:02.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:02.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:14.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:14.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:32.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:34.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:36.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:36.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:42.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:42.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:49.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:52.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"2015\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(\n", + " f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "id": "EYYJgWf6lyha", + "outputId": "720ca56f-eb28-4770-9ec6-8959e77d27d9" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "43 43 https://arxiv.org/pdf/2201.11903 \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "43 3.4 Robustness of Chain of Thought \n", + "52 CARD AND TILE COSTS \n", + "\n", + " question answer pred_answer \\\n", + "43 How many annotators provided independent chain... 3 2 \n", + "52 Can a player pay coins to compensate for missi... YES NO \n", + "\n", + " pred_section \n", + "43 NaN \n", + "52 NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 43,\n \"max\": 52,\n \"num_unique_values\": 2,\n \"samples\": [\n 52,\n 43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "6417fafc-ffd5-46eb-ab92-8a2a905c4f71" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9797979797979798" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 36350eed3b3d47106dea31f6650d580b774c3a2c Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 29 Jan 2025 10:23:11 +0100 Subject: [PATCH 079/120] Use rapizfuzz --- benchmark/gemini_perfect_context.ipynb | 2365 ++++++++++++------------ pyproject.toml | 1 + src/structured_qa/workflow.py | 24 +- 3 files changed, 1198 insertions(+), 1192 deletions(-) diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index 0352686..dbf1f67 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -1,1200 +1,1199 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oD2lVadPlyhR" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JZeb4ABvlyhS" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aI1i7vlyhS" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "atlPXFshlyhS" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QrgOGtuGlyhT", - "outputId": "e5fcc547-6580-4068-d844-28afc1c28d9c" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'structured-qa'...\n", - "remote: Enumerating objects: 724, done.\u001b[K\n", - "remote: Counting objects: 100% (162/162), done.\u001b[K\n", - "remote: Compressing objects: 100% (101/101), done.\u001b[K\n", - "remote: Total 724 (delta 100), reused 74 (delta 61), pack-reused 562 (from 1)\u001b[K\n", - "Receiving objects: 100% (724/724), 2.23 MiB | 6.64 MiB/s, done.\n", - "Resolving deltas: 100% (382/382), done.\n" - ] - } - ], - "source": [ - "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "S22kTrfPlyhU", - "outputId": "40005814-676a-4abd-8d00-3b86d1e3a3f0" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Processing ./structured-qa\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", - "Building wheels for collected packages: structured-qa\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=2defc7ec99afa5814e6713aee2aca5a6364f8b2a2b97e206aa16ccabf91113eb\n", - " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - "Successfully built structured-qa\n", - "Installing collected packages: structured-qa\n", - " Attempting uninstall: structured-qa\n", - " Found existing installation: structured-qa 0.3.3.dev71+gae325d3\n", - " Uninstalling structured-qa-0.3.3.dev71+gae325d3:\n", - " Successfully uninstalled structured-qa-0.3.3.dev71+gae325d3\n", - "Successfully installed structured-qa-0.3.3.dev71+gae325d3\n" - ] - } - ], - "source": [ - "%pip install ./structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qwHWJEsulyhV" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "iJ812u2llyhV" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "jWlaKC5qXZrh" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MKijHC_ClyhX" - }, - "source": [ - "## Function to Process all questions for a single Section" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "oFU-eYMVlyhX" - }, - "outputs": [], - "source": [ - "import json\n", - "import time\n", - "\n", - "\n", - "def process_section_questions(\n", - " section_file,\n", - " section_data,\n", - " model,\n", - "):\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in section_data.iterrows():\n", - " if model.n > 0 and model.n % 10 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " response = model.model.generate_content([section_file.read_text(), question])\n", - " logger.info(response.text)\n", - " response_json = json.loads(response.text)\n", - " answers[index] = response_json[\"answer\"]\n", - " sections[index] = None\n", - " model.n += 1\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "6RoEbYj3XZri" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "whtSJwdrlyhZ" - }, - "outputs": [], - "source": [ - "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with one key: \"answer\".\n", - "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"answer\": 12\n", - "}\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "{\n", - " \"answer\": \"C\"\n", - "}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "ObsvwlNslyhZ" - }, - "outputs": [], - "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")\n", - "model.n = 0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W97jWzzOlyhZ" - }, - "source": [ - "# Run Benchmark" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "QrgOGtuGlyhT", + "outputId": "e5fcc547-6580-4068-d844-28afc1c28d9c" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "AZBwRnfjlyhZ", - "outputId": "e8e05071-0cad-41b5-c3f5-a0437357270d" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\u001b[32m2025-01-28 14:06:32.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:32.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:32.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:34.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 175\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:34.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:40.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:40.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:41.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:41.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:43.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:43.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:50.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:50.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:50.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:55.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:55.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:56.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:56.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:58.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 14\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:58.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:00.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:00.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:04.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:04.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:06.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 100\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:23.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:24.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:24.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:31.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1024\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:31.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:34.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:34.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4000\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 0.1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:40.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:40.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:40.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:42.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:44.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:52.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:52.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:56.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:56.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:58.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:58.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:12:58.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:02.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:02.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:07.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:07.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:10.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:17.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:24.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:24.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 7\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:32.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:32.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:34.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:38.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:40.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:40.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:43.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:43.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:50.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:50.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:56.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:56.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:02.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:02.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:02.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:14.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:14.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:34.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:36.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:36.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:42.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:42.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:49.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:52.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"2015\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "\n", - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for section_name, section_data in data.groupby(\"section\"):\n", - " section_file = Path(\n", - " f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", - "\n", - " answers, sections = process_section_questions(section_file, section_data, model)\n", - "\n", - " for index in section_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'structured-qa'...\n", + "remote: Enumerating objects: 724, done.\u001b[K\n", + "remote: Counting objects: 100% (162/162), done.\u001b[K\n", + "remote: Compressing objects: 100% (101/101), done.\u001b[K\n", + "remote: Total 724 (delta 100), reused 74 (delta 61), pack-reused 562 (from 1)\u001b[K\n", + "Receiving objects: 100% (724/724), 2.23 MiB | 6.64 MiB/s, done.\n", + "Resolving deltas: 100% (382/382), done.\n" + ] + } + ], + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "S22kTrfPlyhU", + "outputId": "40005814-676a-4abd-8d00-3b86d1e3a3f0" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "a9eMkW1-lyha" - }, - "source": [ - "# Results" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing ./structured-qa\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", + "Building wheels for collected packages: structured-qa\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=2defc7ec99afa5814e6713aee2aca5a6364f8b2a2b97e206aa16ccabf91113eb\n", + " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", + "Successfully built structured-qa\n", + "Installing collected packages: structured-qa\n", + " Attempting uninstall: structured-qa\n", + " Found existing installation: structured-qa 0.3.3.dev71+gae325d3\n", + " Uninstalling structured-qa-0.3.3.dev71+gae325d3:\n", + " Successfully uninstalled structured-qa-0.3.3.dev71+gae325d3\n", + "Successfully installed structured-qa-0.3.3.dev71+gae325d3\n" + ] + } + ], + "source": [ + "%pip install ./structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", + "\n", + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " if model.n > 0 and model.n % 10 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " response = model.model.generate_content([section_file.read_text(), question])\n", + " logger.info(response.text)\n", + " response_json = json.loads(response.text)\n", + " answers[index] = response_json[\"answer\"]\n", + " sections[index] = None\n", + " model.n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "whtSJwdrlyhZ" + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with one key: \"answer\".\n", + "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "{\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")\n", + "model.n = 0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "AZBwRnfjlyhZ", + "outputId": "e8e05071-0cad-41b5-c3f5-a0437357270d" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 112 - }, - "id": "EYYJgWf6lyha", - "outputId": "720ca56f-eb28-4770-9ec6-8959e77d27d9" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "43 43 https://arxiv.org/pdf/2201.11903 \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "\n", - " section \\\n", - "43 3.4 Robustness of Chain of Thought \n", - "52 CARD AND TILE COSTS \n", - "\n", - " question answer pred_answer \\\n", - "43 How many annotators provided independent chain... 3 2 \n", - "52 Can a player pay coins to compensate for missi... YES NO \n", - "\n", - " pred_section \n", - "43 NaN \n", - "52 NaN " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 43,\n \"max\": 52,\n \"num_unique_values\": 2,\n \"samples\": [\n 52,\n 43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 14 - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-28 14:06:32.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:32.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:32.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:34.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 175\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:34.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:40.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:40.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:41.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:41.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:43.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:43.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:50.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:50.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:50.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:55.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:55.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:56.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:56.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:58.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 14\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:58.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:00.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:00.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:04.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:04.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:06.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 100\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:23.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:24.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:24.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:31.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1024\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:31.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:34.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:34.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4000\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 0.1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:40.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:40.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:40.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:42.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:44.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:52.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:52.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:56.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:56.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:58.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:58.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:12:58.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:02.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:02.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:07.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:07.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:10.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:17.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:24.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:24.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 7\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:32.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:32.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:34.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:38.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:40.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:40.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:43.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:43.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:50.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:50.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:56.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:56.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:02.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:02.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:02.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:14.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:14.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:32.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:34.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:36.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:36.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:42.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:42.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:49.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:52.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"2015\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 }, + "id": "EYYJgWf6lyha", + "outputId": "720ca56f-eb28-4770-9ec6-8959e77d27d9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "wfz1XQDLlyha", - "outputId": "6417fafc-ffd5-46eb-ab92-8a2a905c4f71" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 43,\n \"max\": 52,\n \"num_unique_values\": 2,\n \"samples\": [\n 52,\n 43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0.9797979797979798" - ] - }, - "metadata": {}, - "execution_count": 15 - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" + "text/plain": [ + " Unnamed: 0 document \\\n", + "43 43 https://arxiv.org/pdf/2201.11903 \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "43 3.4 Robustness of Chain of Thought \n", + "52 CARD AND TILE COSTS \n", + "\n", + " question answer pred_answer \\\n", + "43 How many annotators provided independent chain... 3 2 \n", + "52 Can a player pay coins to compensate for missi... YES NO \n", + "\n", + " pred_section \n", + "43 NaN \n", + "52 NaN " ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python", - "version": "3.10.12" + "id": "wfz1XQDLlyha", + "outputId": "6417fafc-ffd5-46eb-ab92-8a2a905c4f71" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9797979797979798" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/pyproject.toml b/pyproject.toml index ef10196..b77ed97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "pydantic", "pymupdf4llm", "pyyaml", + "rapidfuzz", "streamlit", ] diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index af76f9f..a8f35e1 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -1,6 +1,14 @@ from pathlib import Path from loguru import logger +from rapidfuzz import process + + +def get_matching_section(response, section_names): + """ + Use string similarity to find the most similar section_name. + """ + return process.extractOne(response, section_names)[0] def find_retrieve_answer( @@ -76,15 +84,13 @@ def find_retrieve_answer( if finding_section: response = response.strip() - logger.debug(f"Retrieving section: {response}") - if response in sections_names: - section_content = (sections_dir / f"{response}.txt").read_text() - current_section = response - current_info = section_content - sections_checked.append(response) - else: - logger.error(f"Unknown section: {response}") - return "Unknown section", sections_checked + section_name = get_matching_section(response, sections_names) + logger.debug(f"Retrieving section: {section_name}") + section_content = (sections_dir / f"{section_name}.txt").read_text() + current_section = response + current_info = section_content + sections_checked.append(response) + else: if "MORE INFO" in response.upper(): current_info = None From 215226e9a0f9236b6998fd9f4eb315b838346773 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 29 Jan 2025 10:27:47 +0100 Subject: [PATCH 080/120] Use question_part --- src/structured_qa/workflow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index a8f35e1..3ab8c29 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -58,12 +58,13 @@ def find_retrieve_answer( if not current_info: logger.debug("Finding section") finding_section = True + question_part, *options = question.split("?") messages = [ { "role": "system", "content": find_prompt.format(SECTIONS="\n".join(sections_names)), }, - {"role": "user", "content": question}, + {"role": "user", "content": question_part}, ] else: logger.debug("Answering question") From 5d4d9610db4fba3724f2279f0549b188b7630fff Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 29 Jan 2025 11:00:28 +0100 Subject: [PATCH 081/120] Fix --- src/structured_qa/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 3ab8c29..a461790 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -88,9 +88,9 @@ def find_retrieve_answer( section_name = get_matching_section(response, sections_names) logger.debug(f"Retrieving section: {section_name}") section_content = (sections_dir / f"{section_name}.txt").read_text() - current_section = response + current_section = section_name current_info = section_content - sections_checked.append(response) + sections_checked.append(section_name) else: if "MORE INFO" in response.upper(): From 1223b03f178bc89038c4bc611ce482bfdb5f0ebc Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 29 Jan 2025 12:35:21 +0100 Subject: [PATCH 082/120] break when no section_names --- src/structured_qa/workflow.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index a461790..a51c225 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -53,7 +53,7 @@ def find_retrieve_answer( current_section = None sections_checked = [] - while True: + while sections_names: logger.debug(f"Current information available: {current_info}") if not current_info: logger.debug("Finding section") @@ -85,6 +85,8 @@ def find_retrieve_answer( if finding_section: response = response.strip() + if not sections_names: + return "NOT FOUND", sections_checked section_name = get_matching_section(response, sections_names) logger.debug(f"Retrieving section: {section_name}") section_content = (sections_dir / f"{section_name}.txt").read_text() @@ -99,3 +101,5 @@ def find_retrieve_answer( continue else: return response, sections_checked + + return "NOT FOUND", sections_checked From 08c0b85b7fd0bbf8a94efb0a0464e8acbd4a6cc6 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 29 Jan 2025 13:49:20 +0100 Subject: [PATCH 083/120] Update prompt --- benchmark/gemini_find_retrieve_answer.ipynb | 24 ++++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index d1a2fba..23fd99d 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -100,8 +100,6 @@ "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", "You are a rigorous assistant answering questions.\n", "You only answer based on the current information available.\n", - "You should only answer with ANSWER_TYPE.\n", - "\n", "The current information available is:\n", "\n", "```\n", @@ -114,6 +112,17 @@ "```\n", "I need more info.\n", "```\n", + "\n", + "The answer must be in one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "Question: Is the model an LLM?\n", + "Answer: YES\n", + "- Number (for numeric questions)\n", + "Question: How many layers does the model have?\n", + "Answer: 12\n", + "- Single letter (for multiple-choice questions)\n", + "Question: What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "Answer: C\n", "\"\"\"\n", "\n", "\n", @@ -134,17 +143,6 @@ " sections = {}\n", " for index, row in document_data.iterrows():\n", " question = row[\"question\"]\n", - " try:\n", - " float(row[\"answer\"])\n", - " answer_type = \"a number\"\n", - " except ValueError:\n", - " if row[\"answer\"] in (\"YES\", \"NO\"):\n", - " answer_type = \"YES or NO\"\n", - " else:\n", - " answer_type = \"a single letter\"\n", - "\n", - " answer_prompt = answer_prompt.replace(\"ANSWER_TYPE\", answer_type)\n", - "\n", " logger.info(f\"Question: {question}\")\n", " answer, sections_checked = find_retrieve_answer(\n", " question, model, sections_dir, find_prompt, answer_prompt\n", From 7b9c96cd5fc3cd34781aa26e2519a6f4731feedc Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 29 Jan 2025 13:52:08 +0100 Subject: [PATCH 084/120] Add qwen perfect context --- benchmark/qwen-2.5-7B_perfect_context.ipynb | 1191 +++++++++++++++++++ 1 file changed, 1191 insertions(+) create mode 100644 benchmark/qwen-2.5-7B_perfect_context.ipynb diff --git a/benchmark/qwen-2.5-7B_perfect_context.ipynb b/benchmark/qwen-2.5-7B_perfect_context.ipynb new file mode 100644 index 0000000..31885b0 --- /dev/null +++ b/benchmark/qwen-2.5-7B_perfect_context.ipynb @@ -0,0 +1,1191 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QrgOGtuGlyhT", + "outputId": "e5fcc547-6580-4068-d844-28afc1c28d9c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'structured-qa'...\n", + "remote: Enumerating objects: 724, done.\u001b[K\n", + "remote: Counting objects: 100% (162/162), done.\u001b[K\n", + "remote: Compressing objects: 100% (101/101), done.\u001b[K\n", + "remote: Total 724 (delta 100), reused 74 (delta 61), pack-reused 562 (from 1)\u001b[K\n", + "Receiving objects: 100% (724/724), 2.23 MiB | 6.64 MiB/s, done.\n", + "Resolving deltas: 100% (382/382), done.\n" + ] + } + ], + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S22kTrfPlyhU", + "outputId": "40005814-676a-4abd-8d00-3b86d1e3a3f0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing ./structured-qa\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", + "Building wheels for collected packages: structured-qa\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=2defc7ec99afa5814e6713aee2aca5a6364f8b2a2b97e206aa16ccabf91113eb\n", + " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", + "Successfully built structured-qa\n", + "Installing collected packages: structured-qa\n", + " Attempting uninstall: structured-qa\n", + " Found existing installation: structured-qa 0.3.3.dev71+gae325d3\n", + " Uninstalling structured-qa-0.3.3.dev71+gae325d3:\n", + " Successfully uninstalled structured-qa-0.3.3.dev71+gae325d3\n", + "Successfully installed structured-qa-0.3.3.dev71+gae325d3\n" + ] + } + ], + "source": [ + "%pip install ./structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "The current information available is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return the following message and nothing else:\n", + "\n", + "```\n", + "I need more info.\n", + "```\n", + "\n", + "The answer must be in one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "Question: Is the model an LLM?\n", + "Answer: YES\n", + "- Number (for numeric questions)\n", + "Question: How many layers does the model have?\n", + "Answer: 12\n", + "- Single letter (for multiple-choice questions)\n", + "Question: What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "Answer: C\n", + "\"\"\"\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " if model.n > 0 and model.n % 10 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", + " CURRENT_INFO=section_file.read_text()\n", + " ),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " response = model.get_response(messages)\n", + " answers[index] = response\n", + " sections[index] = None\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_llama_cpp_model(\n", + " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "AZBwRnfjlyhZ", + "outputId": "e8e05071-0cad-41b5-c3f5-a0437357270d" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-28 14:06:32.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:32.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:32.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:34.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 175\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:34.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:36.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:38.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:40.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:40.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:41.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:41.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:43.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:43.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:44.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:46.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:48.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:50.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:50.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:50.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:52.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:53.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:55.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:55.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:56.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:56.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:58.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 14\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:58.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:00.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:00.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:01.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:03.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:04.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:04.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:06.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:06.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:08.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 100\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:10.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:15.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:17.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:20.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:21.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:23.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:24.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:24.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:26.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:30.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:31.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1024\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:31.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:33.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:34.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:34.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4000\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:36.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 0.1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:38.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:40.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:40.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:40.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:42.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:44.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:45.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:49.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:51.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:52.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:52.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:55.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:56.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:56.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:58.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:58.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:12:58.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:02.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:02.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:03.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:05.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:07.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:07.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:10.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:12.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:14.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:16.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:13:17.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:17.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:19.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:20.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:22.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:24.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:24.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 7\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:25.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:27.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:32.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:32.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:34.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:36.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:38.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:38.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:40.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:40.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:41.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:43.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:43.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:46.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:47.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:50.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:50.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:54.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:56.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:56.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:01.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:02.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:02.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:02.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:04.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:06.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:11.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:14.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:14.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:16.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:18.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:32.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:34.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:34.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:36.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:36.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:39.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:40.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:42.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:42.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:45.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:47.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:49.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:50.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:52.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"2015\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "id": "EYYJgWf6lyha", + "outputId": "720ca56f-eb28-4770-9ec6-8959e77d27d9" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 43,\n \"max\": 52,\n \"num_unique_values\": 2,\n \"samples\": [\n 52,\n 43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "43 43 https://arxiv.org/pdf/2201.11903 \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "43 3.4 Robustness of Chain of Thought \n", + "52 CARD AND TILE COSTS \n", + "\n", + " question answer pred_answer \\\n", + "43 How many annotators provided independent chain... 3 2 \n", + "52 Can a player pay coins to compensate for missi... YES NO \n", + "\n", + " pred_section \n", + "43 NaN \n", + "52 NaN " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "6417fafc-ffd5-46eb-ab92-8a2a905c4f71" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9797979797979798" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From c056bdc959cdc29e67611e5d420ee9861651cd7c Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 30 Jan 2025 10:39:00 +0100 Subject: [PATCH 085/120] Update gemini_find_retrieve_answer --- benchmark/gemini_find_retrieve_answer.ipynb | 1524 +++++++++++++++---- 1 file changed, 1269 insertions(+), 255 deletions(-) diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index 23fd99d..0e6dcdc 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -1,257 +1,1271 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Structured Q&A" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9RKWbX7BHEgr" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PYuloevCHEgu" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgYAsUQWHEgv" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EbFAX4heHEgv" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "2HoyF-xbHEgv", + "outputId": "c67c89e2-0140-432a-c741-a2546685176b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-zpym8juf\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-zpym8juf\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 7b9c96cd5fc3cd34781aa26e2519a6f4731feedc\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (3.11.0)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev84+g7b9c96c) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev84+g7b9c96c) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.17.0)\n" + ] + } + ], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "p_hsSGafHEgw", + "outputId": "b78ec720-0315-48fe-9d15-dc3400ca69d7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2025-01-29 13:54:23-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21734 (21K) [text/plain]\n", + "Saving to: ‘structured_qa.csv.3’\n", + "\n", + "\rstructured_qa.csv.3 0%[ ] 0 --.-KB/s \rstructured_qa.csv.3 100%[===================>] 21.22K --.-KB/s in 0.002s \n", + "\n", + "2025-01-29 13:54:24 (9.19 MB/s) - ‘structured_qa.csv.3’ saved [21734/21734]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0MDfM6cyHEgx" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "5bLJE4U7HEgx" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", + "\n", + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "y3yUsRDWHEgy" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AgpODLeJHEgy" + }, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "n6d8F7cYHEgy" + }, + "outputs": [], + "source": [ + "from structured_qa.config import FIND_PROMPT\n", + "from structured_qa.preprocessing import document_to_sections_dir\n", + "from structured_qa.workflow import find_retrieve_answer\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "The current information available is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "The answer must be in one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "Is the model an LLM?\n", + "YES\n", + "- Number (for numeric questions)\n", + "How many layers does the model have?\n", + "12\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "C\n", + "\"\"\"\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + " find_prompt: str = FIND_PROMPT,\n", + " answer_prompt: str = ANSWER_WITH_TYPE_PROMPT,\n", + "):\n", + " sections_dir = Path(\"sections\") / Path(document_file).stem\n", + " if not sections_dir.exists():\n", + " logger.info(\"Splitting document into sections\")\n", + " document_to_sections_dir(document_file, sections_dir)\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " answer, sections_checked = find_retrieve_answer(\n", + " question, model, sections_dir, find_prompt, answer_prompt\n", + " )\n", + " logger.info(f\"Answer: {answer}\")\n", + " answers[index] = answer\n", + " sections[index] = sections_checked[-1] if sections_checked else None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GdlWjANdHEgz" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "9zx8nCaZHEgz" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "U4R84hHRHEgz" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BEzqJJ1yHEgz" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "-qtPf9RmHEgz", + "outputId": "bc1ab1b7-e8d2-4fb0-981b-883e7eeaa794", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[32m2025-01-29 13:54:26.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:27.691\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 988.13ms\n", + "\u001b[32m2025-01-29 13:54:27.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:27.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 540.84ms\n", + "\u001b[32m2025-01-29 13:54:28.263\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.870\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 572.93ms\n", + "\u001b[32m2025-01-29 13:54:28.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:29.418\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 521.13ms\n", + "\u001b[32m2025-01-29 13:54:29.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:29.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 653.13ms\n", + "\u001b[32m2025-01-29 13:54:30.095\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.948\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 829.04ms\n", + "\u001b[32m2025-01-29 13:54:30.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:31.576\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 615.85ms\n", + "\u001b[32m2025-01-29 13:54:31.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:31.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.208\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 613.00ms\n", + "\u001b[32m2025-01-29 13:54:32.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.858\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 626.48ms\n", + "\u001b[32m2025-01-29 13:54:32.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.431\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 554.99ms\n", + "\u001b[32m2025-01-29 13:54:33.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 1706.03762.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.056\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 568.57ms\n", + "\u001b[32m2025-01-29 13:54:34.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 689.37ms\n", + "\u001b[32m2025-01-29 13:54:34.770\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.397\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 613.50ms\n", + "\u001b[32m2025-01-29 13:54:35.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 582.09ms\n", + "\u001b[32m2025-01-29 13:54:35.992\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.49ms\n", + "\u001b[32m2025-01-29 13:54:36.611\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:36.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:36.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.234\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 610.46ms\n", + "\u001b[32m2025-01-29 13:54:37.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.15ms\n", + "\u001b[32m2025-01-29 13:54:37.831\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 609.22ms\n", + "\u001b[32m2025-01-29 13:54:38.452\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:38.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:38.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.94ms\n", + "\u001b[32m2025-01-29 13:54:39.100\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 661.72ms\n", + "\u001b[32m2025-01-29 13:54:39.770\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.339\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 558.40ms\n", + "\u001b[32m2025-01-29 13:54:40.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2106.09685.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.023\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.57ms\n", + "\u001b[32m2025-01-29 13:54:41.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.666\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 635.63ms\n", + "\u001b[32m2025-01-29 13:54:41.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.74ms\n", + "\u001b[32m2025-01-29 13:54:42.310\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.904\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 584.31ms\n", + "\u001b[32m2025-01-29 13:54:42.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2201.11903.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.44ms\n", + "\u001b[32m2025-01-29 13:54:43.588\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:43.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:43.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 481.89ms\n", + "\u001b[32m2025-01-29 13:54:44.083\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 660.08ms\n", + "\u001b[32m2025-01-29 13:54:44.754\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 557.92ms\n", + "\u001b[32m2025-01-29 13:54:45.329\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 636.75ms\n", + "\u001b[32m2025-01-29 13:54:45.976\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.68ms\n", + "\u001b[32m2025-01-29 13:54:46.600\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:46.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:46.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.296\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 686.39ms\n", + "\u001b[32m2025-01-29 13:54:47.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.93ms\n", + "\u001b[32m2025-01-29 13:54:47.915\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2210.05189.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 635.76ms\n", + "\u001b[32m2025-01-29 13:54:48.574\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:48.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:48.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 582.96ms\n", + "\u001b[32m2025-01-29 13:54:49.168\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.864\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 685.88ms\n", + "\u001b[32m2025-01-29 13:54:49.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 559.12ms\n", + "\u001b[32m2025-01-29 13:54:50.436\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:50.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:50.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.69ms\n", + "\u001b[32m2025-01-29 13:54:51.056\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.10ms\n", + "\u001b[32m2025-01-29 13:54:51.676\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.359\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.73ms\n", + "\u001b[32m2025-01-29 13:54:52.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.956\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 588.01ms\n", + "\u001b[32m2025-01-29 13:54:53.555\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:53.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 585.27ms\n", + "\u001b[32m2025-01-29 13:54:53.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.227\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.84ms\n", + "\u001b[32m2025-01-29 13:54:54.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.02ms\n", + "\u001b[32m2025-01-29 13:54:54.872\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 557.19ms\n", + "\u001b[32m2025-01-29 13:54:55.438\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.98ms\n", + "\u001b[32m2025-01-29 13:54:56.098\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 584.96ms\n", + "\u001b[32m2025-01-29 13:54:56.697\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.18ms\n", + "\u001b[32m2025-01-29 13:54:57.422\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:57.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:57.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.42ms\n", + "\u001b[32m2025-01-29 13:54:58.019\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile CUDA_C_Programming_Guide.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.028\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.759\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 715.90ms\n", + "\u001b[32m2025-01-29 13:54:58.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 736.45ms\n", + "\u001b[32m2025-01-29 13:54:59.509\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:59.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:59.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.31ms\n", + "\u001b[32m2025-01-29 13:55:00.210\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.52ms\n", + "\u001b[32m2025-01-29 13:55:00.815\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:01.414\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:01.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 585.33ms\n", + "\u001b[32m2025-01-29 13:55:01.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.50ms\n", + "\u001b[32m2025-01-29 13:55:02.012\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.690\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 660.22ms\n", + "\u001b[32m2025-01-29 13:55:02.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.62ms\n", + "\u001b[32m2025-01-29 13:55:03.415\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:03.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:03.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.13ms\n", + "\u001b[32m2025-01-29 13:55:04.140\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.819\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 673.64ms\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.60ms\n", + "\u001b[32m2025-01-29 13:55:05.500\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:05.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:05.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.66ms\n", + "\u001b[32m2025-01-29 13:55:06.206\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile ?uri=OJ:L_202401689.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.893\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 664.22ms\n", + "\u001b[32m2025-01-29 13:55:06.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.95ms\n", + "\u001b[32m2025-01-29 13:55:07.517\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:07.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:07.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.62ms\n", + "\u001b[32m2025-01-29 13:55:08.136\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 835.55ms\n", + "\u001b[32m2025-01-29 13:55:08.983\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:09.730\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 738.10ms\n", + "\u001b[32m2025-01-29 13:55:09.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:09.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:10.352\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 610.85ms\n", + "\u001b[32m2025-01-29 13:55:10.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:10.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 809.95ms\n", + "\u001b[32m2025-01-29 13:55:11.175\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.79ms\n", + "\u001b[32m2025-01-29 13:55:11.794\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:12.465\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:12.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.49ms\n", + "\u001b[32m2025-01-29 13:55:12.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 835.45ms\n", + "\u001b[32m2025-01-29 13:55:13.310\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 7DUME_EN01_Rules.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.36ms\n", + "\u001b[32m2025-01-29 13:55:13.993\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.31ms\n", + "\u001b[32m2025-01-29 13:55:14.714\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:14.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:14.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.73ms\n", + "\u001b[32m2025-01-29 13:55:15.360\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:15.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:15.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.033\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.55ms\n", + "\u001b[32m2025-01-29 13:55:16.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.680\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 643.44ms\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 661.16ms\n", + "\u001b[32m2025-01-29 13:55:17.353\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:17.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:17.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.50ms\n", + "\u001b[32m2025-01-29 13:55:18.050\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.33ms\n", + "\u001b[32m2025-01-29 13:55:18.721\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 609.49ms\n", + "\u001b[32m2025-01-29 13:55:19.345\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:19.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:19.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.21ms\n", + "\u001b[32m2025-01-29 13:55:20.068\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.96ms\n", + "\u001b[32m2025-01-29 13:55:20.764\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.86ms\n", + "\u001b[32m2025-01-29 13:55:21.409\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:21.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:21.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.19ms\n", + "\u001b[32m2025-01-29 13:55:22.081\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.59ms\n", + "\u001b[32m2025-01-29 13:55:22.701\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.04ms\n", + "\u001b[32m2025-01-29 13:55:23.422\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile is_eotn_rulebook.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.39ms\n", + "\u001b[32m2025-01-29 13:55:24.159\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 614.42ms\n", + "\u001b[32m2025-01-29 13:55:24.783\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.58ms\n", + "\u001b[32m2025-01-29 13:55:25.454\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:25.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:25.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 860.42ms\n", + "\u001b[32m2025-01-29 13:55:26.327\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:26.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:26.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.94ms\n", + "\u001b[32m2025-01-29 13:55:27.024\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.696\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 669.67ms\n", + "\u001b[32m2025-01-29 13:55:28.420\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:28.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:28.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 721.91ms\n", + "\u001b[32m2025-01-29 13:55:29.116\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 687.25ms\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.55ms\n", + "\u001b[32m2025-01-29 13:55:29.813\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.97ms\n", + "\u001b[32m2025-01-29 13:55:30.434\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:30.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:30.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.87ms\n", + "\u001b[32m2025-01-29 13:55:31.077\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.734\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 644.82ms\n", + "\u001b[32m2025-01-29 13:55:31.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:32.540\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 763.87ms\n", + "\u001b[32m2025-01-29 13:55:32.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "3eW9TIKjHEgz", + "outputId": "186ae159-38de-4104-ca6d-c29d1172503f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "0 0 https://arxiv.org/pdf/1706.03762 \n", + "1 1 https://arxiv.org/pdf/1706.03762 \n", + "2 2 https://arxiv.org/pdf/1706.03762 \n", + "3 3 https://arxiv.org/pdf/1706.03762 \n", + "4 4 https://arxiv.org/pdf/1706.03762 \n", + ".. ... ... \n", + "94 94 https://aiindex.stanford.edu/wp-content/upload... \n", + "95 95 https://aiindex.stanford.edu/wp-content/upload... \n", + "96 96 https://aiindex.stanford.edu/wp-content/upload... \n", + "97 97 https://aiindex.stanford.edu/wp-content/upload... \n", + "98 98 https://aiindex.stanford.edu/wp-content/upload... \n", + "\n", + " section \\\n", + "0 3 Model Architecture \n", + "1 3.1 Encoder and Decoder Stacks \n", + "2 3.1 Encoder and Decoder Stacks \n", + "3 3.2.2 Multi-Head Attention \n", + "4 3.4 Embeddings and Softmax \n", + ".. ... \n", + "94 LLM Tokenization Introduces Unfairness \n", + "95 U.S. Regulation \n", + "96 U.S. Regulation \n", + "97 Europe \n", + "98 Europe \n", + "\n", + " question answer \\\n", + "0 What type of architecture does the model use? ... C \n", + "1 How many layers compose the encoder? 6 \n", + "2 How many layers compose the decoder? 6 \n", + "3 How many parallel attention heads are used? 8 \n", + "4 Does the final model use learned embeddings fo... YES \n", + ".. ... ... \n", + "94 What are the three major inequalities resultin... B \n", + "95 How many AI-related regulations were enacted i... 25 \n", + "96 Which of the following was identified as a hig... B \n", + "97 Which country had the highest proportion of fe... B \n", + "98 Which countries reported the smallest proporti... C \n", + "\n", + " pred_answer pred_section \n", + "0 GENERATION ERROR NaN \n", + "1 GENERATION ERROR NaN \n", + "2 GENERATION ERROR NaN \n", + "3 GENERATION ERROR NaN \n", + "4 GENERATION ERROR NaN \n", + ".. ... ... \n", + "94 GENERATION ERROR NaN \n", + "95 GENERATION ERROR NaN \n", + "96 GENERATION ERROR NaN \n", + "97 GENERATION ERROR NaN \n", + "98 GENERATION ERROR NaN \n", + "\n", + "[99 rows x 7 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
00https://arxiv.org/pdf/1706.037623 Model ArchitectureWhat type of architecture does the model use? ...CGENERATION ERRORNaN
11https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the encoder?6GENERATION ERRORNaN
22https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the decoder?6GENERATION ERRORNaN
33https://arxiv.org/pdf/1706.037623.2.2 Multi-Head AttentionHow many parallel attention heads are used?8GENERATION ERRORNaN
44https://arxiv.org/pdf/1706.037623.4 Embeddings and SoftmaxDoes the final model use learned embeddings fo...YESGENERATION ERRORNaN
........................
9494https://aiindex.stanford.edu/wp-content/upload...LLM Tokenization Introduces UnfairnessWhat are the three major inequalities resultin...BGENERATION ERRORNaN
9595https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationHow many AI-related regulations were enacted i...25GENERATION ERRORNaN
9696https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationWhich of the following was identified as a hig...BGENERATION ERRORNaN
9797https://aiindex.stanford.edu/wp-content/upload...EuropeWhich country had the highest proportion of fe...BGENERATION ERRORNaN
9898https://aiindex.stanford.edu/wp-content/upload...EuropeWhich countries reported the smallest proporti...CGENERATION ERRORNaN
\n", + "

99 rows × 7 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 99,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 0,\n \"max\": 98,\n \"num_unique_values\": 99,\n \"samples\": [\n 62,\n 40,\n 95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 59,\n \"samples\": [\n \"3 Model Architecture\",\n \"5.2 Hardware and Schedule\",\n \"CARD AND TILE EFFECTS\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 98,\n \"samples\": [\n \"Can you raid the locations of a player that has passed during the action phase?\",\n \"Is symbolic reasoning usually simple for humans but challenging for language models?\",\n \"How many AI-related regulations were enacted in the United States in 2023?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 21,\n \"samples\": [\n \"C\",\n \"4\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"GENERATION ERROR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "AhenESELHEgz", + "outputId": "4b5d7785-4d17-4c78-b0f8-d69fa50bad15", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.0" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "colab": { + "provenance": [] + } }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to Process a single Document" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from structured_qa.config import FIND_PROMPT\n", - "from structured_qa.preprocessing import document_to_sections_dir\n", - "from structured_qa.workflow import find_retrieve_answer\n", - "\n", - "\n", - "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", - "You are a rigorous assistant answering questions.\n", - "You only answer based on the current information available.\n", - "The current information available is:\n", - "\n", - "```\n", - "{CURRENT_INFO}\n", - "```\n", - "\n", - "If the current information available not enough to answer the question,\n", - "you must return the following message and nothing else:\n", - "\n", - "```\n", - "I need more info.\n", - "```\n", - "\n", - "The answer must be in one of the following formats:\n", - "- YES/NO (for boolean questions)\n", - "Question: Is the model an LLM?\n", - "Answer: YES\n", - "- Number (for numeric questions)\n", - "Question: How many layers does the model have?\n", - "Answer: 12\n", - "- Single letter (for multiple-choice questions)\n", - "Question: What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "Answer: C\n", - "\"\"\"\n", - "\n", - "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - " find_prompt: str = FIND_PROMPT,\n", - " answer_prompt: str = ANSWER_WITH_TYPE_PROMPT,\n", - "):\n", - " sections_dir = Path(\"sections\") / Path(document_file).stem\n", - " if not sections_dir.exists():\n", - " logger.info(\"Splitting document into sections\")\n", - " document_to_sections_dir(document_file, sections_dir)\n", - "\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " answer, sections_checked = find_retrieve_answer(\n", - " question, model, sections_dir, find_prompt, answer_prompt\n", - " )\n", - "\n", - " answers[index] = answer\n", - " sections[index] = sections_checked[-1] if sections_checked else None\n", - "\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=None)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From b7264471a855f8067ba2d2f15ccac3082e441793 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 30 Jan 2025 10:40:06 +0100 Subject: [PATCH 086/120] Update qwen perfect context --- benchmark/qwen-2.5-7B_perfect_context.ipynb | 1191 -------------- benchmark/qwen_2_5_7B_perfect_context.ipynb | 1613 +++++++++++++++++++ 2 files changed, 1613 insertions(+), 1191 deletions(-) delete mode 100644 benchmark/qwen-2.5-7B_perfect_context.ipynb create mode 100644 benchmark/qwen_2_5_7B_perfect_context.ipynb diff --git a/benchmark/qwen-2.5-7B_perfect_context.ipynb b/benchmark/qwen-2.5-7B_perfect_context.ipynb deleted file mode 100644 index 31885b0..0000000 --- a/benchmark/qwen-2.5-7B_perfect_context.ipynb +++ /dev/null @@ -1,1191 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oD2lVadPlyhR" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JZeb4ABvlyhS" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aI1i7vlyhS" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "atlPXFshlyhS" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QrgOGtuGlyhT", - "outputId": "e5fcc547-6580-4068-d844-28afc1c28d9c" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cloning into 'structured-qa'...\n", - "remote: Enumerating objects: 724, done.\u001b[K\n", - "remote: Counting objects: 100% (162/162), done.\u001b[K\n", - "remote: Compressing objects: 100% (101/101), done.\u001b[K\n", - "remote: Total 724 (delta 100), reused 74 (delta 61), pack-reused 562 (from 1)\u001b[K\n", - "Receiving objects: 100% (724/724), 2.23 MiB | 6.64 MiB/s, done.\n", - "Resolving deltas: 100% (382/382), done.\n" - ] - } - ], - "source": [ - "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "S22kTrfPlyhU", - "outputId": "40005814-676a-4abd-8d00-3b86d1e3a3f0" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing ./structured-qa\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", - "Building wheels for collected packages: structured-qa\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=2defc7ec99afa5814e6713aee2aca5a6364f8b2a2b97e206aa16ccabf91113eb\n", - " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - "Successfully built structured-qa\n", - "Installing collected packages: structured-qa\n", - " Attempting uninstall: structured-qa\n", - " Found existing installation: structured-qa 0.3.3.dev71+gae325d3\n", - " Uninstalling structured-qa-0.3.3.dev71+gae325d3:\n", - " Successfully uninstalled structured-qa-0.3.3.dev71+gae325d3\n", - "Successfully installed structured-qa-0.3.3.dev71+gae325d3\n" - ] - } - ], - "source": [ - "%pip install ./structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qwHWJEsulyhV" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "iJ812u2llyhV" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "jWlaKC5qXZrh" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MKijHC_ClyhX" - }, - "source": [ - "## Function to Process all questions for a single Section" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "oFU-eYMVlyhX" - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "\n", - "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", - "You are a rigorous assistant answering questions.\n", - "You only answer based on the current information available.\n", - "The current information available is:\n", - "\n", - "```\n", - "{CURRENT_INFO}\n", - "```\n", - "\n", - "If the current information available not enough to answer the question,\n", - "you must return the following message and nothing else:\n", - "\n", - "```\n", - "I need more info.\n", - "```\n", - "\n", - "The answer must be in one of the following formats:\n", - "- YES/NO (for boolean questions)\n", - "Question: Is the model an LLM?\n", - "Answer: YES\n", - "- Number (for numeric questions)\n", - "Question: How many layers does the model have?\n", - "Answer: 12\n", - "- Single letter (for multiple-choice questions)\n", - "Question: What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "Answer: C\n", - "\"\"\"\n", - "\n", - "\n", - "def process_section_questions(\n", - " section_file,\n", - " section_data,\n", - " model,\n", - "):\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in section_data.iterrows():\n", - " if model.n > 0 and model.n % 10 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " messages = [\n", - " {\n", - " \"role\": \"system\",\n", - " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", - " CURRENT_INFO=section_file.read_text()\n", - " ),\n", - " },\n", - " {\"role\": \"user\", \"content\": question},\n", - " ]\n", - " response = model.get_response(messages)\n", - " answers[index] = response\n", - " sections[index] = None\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "6RoEbYj3XZri" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_llama_cpp_model" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "ObsvwlNslyhZ" - }, - "outputs": [], - "source": [ - "model = load_llama_cpp_model(\n", - " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W97jWzzOlyhZ" - }, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "AZBwRnfjlyhZ", - "outputId": "e8e05071-0cad-41b5-c3f5-a0437357270d" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-28 14:06:32.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:32.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:32.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:34.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 175\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:34.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:40.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:40.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:41.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:41.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:43.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:43.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:50.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:50.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:50.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:55.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:55.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:56.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:56.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:58.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 14\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:58.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:00.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:00.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:04.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:04.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:06.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 100\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:23.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:24.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:24.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:31.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1024\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:31.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:34.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:34.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4000\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 0.1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:40.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:40.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:40.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:42.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:44.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:52.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:52.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:56.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:56.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:58.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:58.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:12:58.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:02.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:02.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:07.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:07.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:10.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:17.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:24.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:24.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 7\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:32.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:32.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:34.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:38.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:40.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:40.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:43.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:43.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:50.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:50.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:56.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:56.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:02.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:02.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:02.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:14.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:14.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:34.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:36.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:36.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:42.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:42.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:49.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:52.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"2015\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "\n", - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for section_name, section_data in data.groupby(\"section\"):\n", - " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", - "\n", - " answers, sections = process_section_questions(section_file, section_data, model)\n", - "\n", - " for index in section_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a9eMkW1-lyha" - }, - "source": [ - "# Results" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 112 - }, - "id": "EYYJgWf6lyha", - "outputId": "720ca56f-eb28-4770-9ec6-8959e77d27d9" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 43,\n \"max\": 52,\n \"num_unique_values\": 2,\n \"samples\": [\n 52,\n 43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "43 43 https://arxiv.org/pdf/2201.11903 \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "\n", - " section \\\n", - "43 3.4 Robustness of Chain of Thought \n", - "52 CARD AND TILE COSTS \n", - "\n", - " question answer pred_answer \\\n", - "43 How many annotators provided independent chain... 3 2 \n", - "52 Can a player pay coins to compensate for missi... YES NO \n", - "\n", - " pred_section \n", - "43 NaN \n", - "52 NaN " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "wfz1XQDLlyha", - "outputId": "6417fafc-ffd5-46eb-ab92-8a2a905c4f71" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9797979797979798" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/benchmark/qwen_2_5_7B_perfect_context.ipynb b/benchmark/qwen_2_5_7B_perfect_context.ipynb new file mode 100644 index 0000000..e18825d --- /dev/null +++ b/benchmark/qwen_2_5_7B_perfect_context.ipynb @@ -0,0 +1,1613 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QrgOGtuGlyhT", + "outputId": "340ae5a6-0710-40e8-baba-390c143061cb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'structured-qa'...\n", + "remote: Enumerating objects: 795, done.\u001b[K\n", + "remote: Counting objects: 100% (233/233), done.\u001b[K\n", + "remote: Compressing objects: 100% (134/134), done.\u001b[K\n", + "remote: Total 795 (delta 148), reused 126 (delta 92), pack-reused 562 (from 1)\u001b[K\n", + "Receiving objects: 100% (795/795), 2.27 MiB | 5.75 MiB/s, done.\n", + "Resolving deltas: 100% (430/430), done.\n" + ] + } + ], + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "S22kTrfPlyhU", + "outputId": "e3fa4c42-57a5-4fa4-e43e-5b411a3c45fa" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Processing ./structured-qa\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (3.11.0)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev84+g7b9c96c) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev84+g7b9c96c) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.17.0)\n", + "Building wheels for collected packages: structured-qa\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev84+g7b9c96c-py3-none-any.whl size=16325 sha256=3a2543903414e4e12121937c7c91c685062c83f3fc53f84a7316c8bec56b4181\n", + " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", + "Successfully built structured-qa\n", + "Installing collected packages: structured-qa\n", + " Attempting uninstall: structured-qa\n", + " Found existing installation: structured-qa 0.3.3.dev84+g7b9c96c\n", + " Uninstalling structured-qa-0.3.3.dev84+g7b9c96c:\n", + " Successfully uninstalled structured-qa-0.3.3.dev84+g7b9c96c\n", + "Successfully installed structured-qa-0.3.3.dev84+g7b9c96c\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "structured_qa" + ] + }, + "id": "df14365dea1e4a95896a43fb5764312a" + } + }, + "metadata": {} + } + ], + "source": [ + "%pip install ./structured-qa" + ] + }, + { + "cell_type": "code", + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ], + "metadata": { + "id": "mZtwFXA5IOvn", + "outputId": "c3b6fd2a-27f5-44e7-b5f6-b05bde51a979", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "The current information available is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return the following message and nothing else:\n", + "\n", + "```\n", + "I need more info.\n", + "```\n", + "\n", + "The answer must be in one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "Question: Is the model an LLM?\n", + "Answer: YES\n", + "- Number (for numeric questions)\n", + "Question: How many layers does the model have?\n", + "Answer: 12\n", + "- Single letter (for multiple-choice questions)\n", + "Question: What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "Answer: C\n", + "\"\"\"\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", + " CURRENT_INFO=section_file.read_text()\n", + " ),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " response = model.get_response(messages)\n", + " logger.info(f\"Answer: {response}\")\n", + " answers[index] = response\n", + " sections[index] = None\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "ObsvwlNslyhZ", + "outputId": "f8e3573d-30d7-4f57-d8eb-d430d42e3755", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 173, + "referenced_widgets": [ + "92e5c930e73d4fce9d0d79de5b12fbc3", + "4fbd860349a44d1faaaa708ee8d81cd0", + "1909a0309d9d4293a10da4abca6a4468", + "47f329f3d9c74758ad6e96ee76036b90", + "a96b9b499a11466da9b2efc8e55689c3", + "e1bfae86937042d8a105f04e3ccdb697", + "8733a460cd564e5f8b9177c4b37bc1d5", + "0e89d5adf51e49d6a7a5e720af8c0f3d", + "b4092c4c9e124f6bb3ec25fbda77044a", + "eaa38024ac24497080f3cff4ddc5b39b", + "a4935eb54fc2442fa243555f0b572ca0" + ] + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:00.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:00.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:01.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:01.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:01.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:01.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:01.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:02.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: EI30\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:02.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:02.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:02.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:02.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:03.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:03.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:03.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:03.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:04.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:04.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:04.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:04.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:04.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:04.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:05.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:06.241\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:06.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:06.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:07.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:07.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:07.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:07.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:08.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Number\n", + "Question: How many parameters are in the toy model (y = x^2) tree?\n", + "Answer: 14\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:08.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:08.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:08.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:09.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: Fewer operations\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:09.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:09.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:09.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:09.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:09.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:10.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:10.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:10.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:10.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:10.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:11.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Five\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:11.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:11.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:11.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:11.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 100\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:12.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:13.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:13.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:13.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:13.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:13.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:13.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:14.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:14.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:14.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:14.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:14.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:14.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:15.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:15.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:15.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:15.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\n", + "Based on the information provided, the out-of-domain evaluation for symbolic reasoning tasks includes the coin flip task. The last letter concatenation task is described as a \"toy task\" with in-domain and out-of-domain evaluations, but the coin flip task is not mentioned in the context of a different task. Therefore, the correct answer is A: Coin Flip.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:18.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:20.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1024\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:20.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:20.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: I need more info.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:20.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:20.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:20.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:20.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:21.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:21.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:21.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:21.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Pdrop = 0.1\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:21.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:21.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:22.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:22.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:22.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:22.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:22.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: It is cached for later use and to avoid recompilation.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:23.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:23.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:23.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:23.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:24.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:24.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:24.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:24.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:24.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:24.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:25.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:26.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:26.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:26.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:27.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:28.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:28.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:28.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:28.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:28.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:28.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:29.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:29.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:29.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:30.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:30.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:30.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:31.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: Datasets ensuring quality and diversity\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:31.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:31.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:32.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:33.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:33.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:33.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:33.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:33.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:33.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:33.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:34.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:34.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:34.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: Player 1\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:35.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:37.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:38.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:38.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:38.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:38.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:38.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:39.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:41.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:41.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:41.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:41.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:41.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:42.010\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:42.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:42.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:43.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:43.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:43.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:43.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:43.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:43.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:44.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:44.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:44.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:45.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:45.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:45.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:45.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:45.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:46.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:47.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:47.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:47.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:47.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:47.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:48.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:48.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:48.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:49.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:49.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:50.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:50.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:50.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-29 13:02:50.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "EYYJgWf6lyha", + "outputId": "dc1b33dd-ee28-4f01-d7a4-9ba023aa8a73" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "14 14 https://arxiv.org/pdf/2210.05189 \n", + "16 16 https://arxiv.org/pdf/2210.05189 \n", + "21 21 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "38 38 https://arxiv.org/pdf/2201.11903 \n", + "42 42 https://arxiv.org/pdf/2201.11903 \n", + "43 43 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", + "72 72 https://github.com/mozilla-ai/structured-qa/re... \n", + "76 76 https://commission.europa.eu/document/download... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "82 82 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "85 85 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "14 3 Experimental Results \n", + "16 3 Experimental Results \n", + "21 Data and data governance \n", + "38 3.1 Experimental Setup \n", + "42 5 Symbolic Reasoning \n", + "43 3.4 Robustness of Chain of Thought \n", + "47 CARD AND TILE EFFECTS \n", + "51 CHAPTER OVERVIEW \n", + "52 CARD AND TILE COSTS \n", + "65 EXPEDITION PHASE \n", + "72 GAME END \n", + "76 1.2.1. Internal partitions and doors \n", + "78 5.2. Thread Hierarchy \n", + "82 6.1.1. Compilation Workflow \n", + "85 15.3. API Fundamentals \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "14 How many parameters are in the toy model (y = ... 14 \n", + "16 What is the main computational advantage of de... B \n", + "21 what is a requirement for datasets used in hig... B \n", + "38 How many large language models were evaluated? 5 \n", + "42 Which symbolic reasoning task is used as an ou... A \n", + "43 How many annotators provided independent chain... 3 \n", + "47 How many different races are there? 6 \n", + "51 After taking a landmark tile, do you reveal a ... NO \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "65 Do you need a fish to conquer a distant island? YES \n", + "72 If player 1 has 30 Victory points and 4 worker... A \n", + "76 What fire resistance must vertical partitions ... A \n", + "78 Can you identify a thread with a four-dimensio... NO \n", + "82 What happens to the compiled binary code after... A \n", + "85 When does a graph allocation's lifetime end? -... B \n", + "\n", + " pred_answer pred_section \n", + "10 PDROP = 0.1 NaN \n", + "14 NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN T... NaN \n", + "16 B: FEWER OPERATIONS NaN \n", + "21 B: DATASETS ENSURING QUALITY AND DIVERSITY NaN \n", + "38 FIVE NaN \n", + "42 A\\nBASED ON THE INFORMATION PROVIDED, THE OUT-... NaN \n", + "43 2 NaN \n", + "47 5 NaN \n", + "51 YES NaN \n", + "52 NO NaN \n", + "65 NO NaN \n", + "72 A: PLAYER 1 NaN \n", + "76 A: EI30 NaN \n", + "78 I NEED MORE INFO. NaN \n", + "82 A: IT IS CACHED FOR LATER USE AND TO AVOID REC... NaN \n", + "85 B: WHEN THE EXECUTION REACHES THE FREEING GRAP... NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP = 0.1NaN
1414https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many parameters are in the toy model (y = ...14NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN T...NaN
1616https://arxiv.org/pdf/2210.051893 Experimental ResultsWhat is the main computational advantage of de...BB: FEWER OPERATIONSNaN
2121https://eur-lex.europa.eu/legal-content/EN/TXT...Data and data governancewhat is a requirement for datasets used in hig...BB: DATASETS ENSURING QUALITY AND DIVERSITYNaN
3838https://arxiv.org/pdf/2201.119033.1 Experimental SetupHow many large language models were evaluated?5FIVENaN
4242https://arxiv.org/pdf/2201.119035 Symbolic ReasoningWhich symbolic reasoning task is used as an ou...AA\\nBASED ON THE INFORMATION PROVIDED, THE OUT-...NaN
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?65NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
7272https://github.com/mozilla-ai/structured-qa/re...GAME ENDIf player 1 has 30 Victory points and 4 worker...AA: PLAYER 1NaN
7676https://commission.europa.eu/document/download...1.2.1. Internal partitions and doorsWhat fire resistance must vertical partitions ...AA: EI30NaN
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOI NEED MORE INFO.NaN
8282https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...6.1.1. Compilation WorkflowWhat happens to the compiled binary code after...AA: IT IS CACHED FOR LATER USE AND TO AVOID REC...NaN
8585https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsWhen does a graph allocation's lifetime end? -...BB: WHEN THE EXECUTION REACHES THE FREEING GRAP...NaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 16,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 10,\n \"max\": 85,\n \"num_unique_values\": 16,\n \"samples\": [\n 10,\n 14,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"EXPEDITION PHASE\",\n \"1.2.1. Internal partitions and doors\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 16,\n \"samples\": [\n \"What was the dropout rate used for the base model?\",\n \"How many parameters are in the toy model (y = x^2) tree?\",\n \"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"NO\",\n \"14\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"NO\",\n \"A: EI30\",\n \"PDROP = 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "8808948d-ec60-4e7e-ad8d-996412889ea0" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8383838383838383" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "92e5c930e73d4fce9d0d79de5b12fbc3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4fbd860349a44d1faaaa708ee8d81cd0", + "IPY_MODEL_1909a0309d9d4293a10da4abca6a4468", + "IPY_MODEL_47f329f3d9c74758ad6e96ee76036b90" + ], + "layout": "IPY_MODEL_a96b9b499a11466da9b2efc8e55689c3" + } + }, + "4fbd860349a44d1faaaa708ee8d81cd0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e1bfae86937042d8a105f04e3ccdb697", + "placeholder": "​", + "style": "IPY_MODEL_8733a460cd564e5f8b9177c4b37bc1d5", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "1909a0309d9d4293a10da4abca6a4468": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0e89d5adf51e49d6a7a5e720af8c0f3d", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b4092c4c9e124f6bb3ec25fbda77044a", + "value": 8098525888 + } + }, + "47f329f3d9c74758ad6e96ee76036b90": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_eaa38024ac24497080f3cff4ddc5b39b", + "placeholder": "​", + "style": "IPY_MODEL_a4935eb54fc2442fa243555f0b572ca0", + "value": " 8.10G/8.10G [03:12<00:00, 41.4MB/s]" + } + }, + "a96b9b499a11466da9b2efc8e55689c3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e1bfae86937042d8a105f04e3ccdb697": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8733a460cd564e5f8b9177c4b37bc1d5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0e89d5adf51e49d6a7a5e720af8c0f3d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b4092c4c9e124f6bb3ec25fbda77044a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "eaa38024ac24497080f3cff4ddc5b39b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a4935eb54fc2442fa243555f0b572ca0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 036f8a399a9eb21539ea42a1f8ced5db11e20433 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 30 Jan 2025 12:56:31 +0100 Subject: [PATCH 087/120] Add qwen RAGatouille --- benchmark/gemini_RAGatouille.ipynb | 6901 +++++++++++++------ benchmark/gemini_full_context.ipynb | 1094 ++- benchmark/qwen_2_5_7B_RAGatouille.ipynb | 4864 +++++++++++++ benchmark/qwen_2_5_7B_perfect_context.ipynb | 471 +- 4 files changed, 10450 insertions(+), 2880 deletions(-) create mode 100644 benchmark/qwen_2_5_7B_RAGatouille.ipynb diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb index c30fcf9..61f1cf4 100644 --- a/benchmark/gemini_RAGatouille.ipynb +++ b/benchmark/gemini_RAGatouille.ipynb @@ -1,2141 +1,4874 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Fcx4osZYq3mt" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZE32hJKeq3mv" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jDIEL7SNq3mv" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_OwS4mKRq3mv" - }, - "source": [ - "## GPU Check" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_FYZaTmnq3mw" - }, - "source": [ - "First, you'll need to enable GPUs for the notebook:\n", - "\n", - "- Navigate to `Edit`→`Notebook Settings`\n", - "- Select T4 GPU from the Hardware Accelerator section\n", - "- Click `Save` and accept.\n", - "\n", - "Next, we'll confirm that we can connect to the GPU:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4RsETkxfq3mw", - "outputId": "172850ad-a72e-434e-9686-9060fa95e660" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GPU is available!\n" - ] - } - ], - "source": [ - "import torch\n", - "\n", - "if not torch.cuda.is_available():\n", - " raise RuntimeError(\"GPU not available\")\n", - "else:\n", - " print(\"GPU is available!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yEgVEmSQq3mx" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "P1eAychVq3my", - "outputId": "b152776c-81c3-487a-d804-09ef5fb75258" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting ragatouille\n", - " Downloading ragatouille-0.0.8.post4-py3-none-any.whl.metadata (15 kB)\n", - "Collecting colbert-ai==0.2.19 (from ragatouille)\n", - " Downloading colbert-ai-0.2.19.tar.gz (86 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.7 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.7/86.7 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting faiss-cpu<2.0.0,>=1.7.4 (from ragatouille)\n", - " Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)\n", - "Collecting fast-pytorch-kmeans==0.2.0.1 (from ragatouille)\n", - " Downloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl.metadata (1.1 kB)\n", - "Requirement already satisfied: langchain>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.14)\n", - "Requirement already satisfied: langchain_core>=0.1.4 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.29)\n", - "Collecting llama-index>=0.7 (from ragatouille)\n", - " Downloading llama_index-0.12.13-py3-none-any.whl.metadata (12 kB)\n", - "Collecting onnx<2.0.0,>=1.15.0 (from ragatouille)\n", - " Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n", - "Collecting sentence-transformers<3.0.0,>=2.2.2 (from ragatouille)\n", - " Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)\n", - "Collecting srsly==2.4.8 (from ragatouille)\n", - " Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", - "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.5.1+cu121)\n", - "Requirement already satisfied: transformers<5.0.0,>=4.36.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (4.47.1)\n", - "Collecting voyager<3.0.0,>=2.0.2 (from ragatouille)\n", - " Downloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)\n", - "Collecting bitarray (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)\n", - "Collecting datasets (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: flask in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.1.0)\n", - "Collecting git-python (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading git_python-1.0.3-py2.py3-none-any.whl.metadata (331 bytes)\n", - "Collecting python-dotenv (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", - "Collecting ninja (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.13.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (4.67.1)\n", - "Collecting ujson (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fast-pytorch-kmeans==0.2.0.1->ragatouille) (1.26.4)\n", - "Collecting pynvml (from fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", - " Downloading pynvml-12.0.0-py3-none-any.whl.metadata (5.4 kB)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.11/dist-packages (from srsly==2.4.8->ragatouille) (2.0.10)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from faiss-cpu<2.0.0,>=1.7.4->ragatouille) (24.2)\n", - "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (6.0.2)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.0.37)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (3.11.11)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.5)\n", - "Requirement already satisfied: langsmith<0.3,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.2.10)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.10.5)\n", - "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.32.3)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (9.0.0)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (1.33)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (4.12.2)\n", - "Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_agent_openai-0.4.2-py3-none-any.whl.metadata (727 bytes)\n", - "Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)\n", - "Collecting llama-index-core<0.13.0,>=0.12.13 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_core-0.12.13-py3-none-any.whl.metadata (2.5 kB)\n", - "Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)\n", - "Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl.metadata (3.6 kB)\n", - "Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl.metadata (3.3 kB)\n", - "Collecting llama-index-multi-modal-llms-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl.metadata (726 bytes)\n", - "Collecting llama-index-program-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_program_openai-0.3.1-py3-none-any.whl.metadata (764 bytes)\n", - "Collecting llama-index-question-gen-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl.metadata (783 bytes)\n", - "Collecting llama-index-readers-file<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_readers_file-0.4.4-py3-none-any.whl.metadata (5.4 kB)\n", - "Collecting llama-index-readers-llama-parse>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl.metadata (3.6 kB)\n", - "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (3.9.1)\n", - "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.11/dist-packages (from onnx<2.0.0,>=1.15.0->ragatouille) (4.25.5)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (1.6.0)\n", - "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (0.27.1)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (11.1.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.16.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.5)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2024.10.0)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (1.13.1)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13->ragatouille) (12.6.85)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.13->ragatouille) (1.3.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (2024.11.6)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.21.0)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.5.2)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.18.3)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain_core>=0.1.4->ragatouille) (3.0.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (0.28.1)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (3.10.14)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.0)\n", - "Requirement already satisfied: openai>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.59.6)\n", - "Collecting dataclasses-json (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", - " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", - "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille) (1.2.15)\n", - "Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", - " Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)\n", - "Collecting filetype<2.0.0,>=1.2.0 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", - " Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)\n", - "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille) (1.6.0)\n", - "Collecting tiktoken>=0.3.3 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", - " Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", - "Collecting typing-inspect>=0.8.0 (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", - " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", - "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille) (1.17.0)\n", - "Collecting llama-cloud<0.2.0,>=0.1.8 (from llama-index-indices-managed-llama-cloud>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading llama_cloud-0.1.10-py3-none-any.whl.metadata (912 bytes)\n", - "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (4.12.3)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.2.2)\n", - "Collecting pypdf<6.0.0,>=5.1.0 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)\n", - "Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)\n", - "Collecting llama-parse>=0.5.0 (from llama-index-readers-llama-parse>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading llama_parse-0.5.20-py3-none-any.whl.metadata (6.9 kB)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (8.1.8)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (1.4.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (2.27.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2024.12.14)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain>=0.1.0->ragatouille) (3.1.1)\n", - "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (17.0.0)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets->colbert-ai==0.2.19->ragatouille)\n", - " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", - "Collecting xxhash (from datasets->colbert-ai==0.2.19->ragatouille)\n", - " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess<0.70.17 (from datasets->colbert-ai==0.2.19->ragatouille)\n", - " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", - "Collecting fsspec (from torch>=1.13->ragatouille)\n", - " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (3.1.3)\n", - "Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (2.2.0)\n", - "Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (1.9.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.13->ragatouille) (3.0.2)\n", - "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from git-python->colbert-ai==0.2.19->ragatouille) (3.1.44)\n", - "Collecting nvidia-ml-py<13.0.0a0,>=12.0.0 (from pynvml->fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", - " Downloading nvidia_ml_py-12.560.30-py3-none-any.whl.metadata (8.6 kB)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers<3.0.0,>=2.2.2->ragatouille) (3.5.0)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.6)\n", - "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (3.7.1)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain>=0.1.0->ragatouille) (0.14.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.9.0)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (0.8.2)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.3.1)\n", - "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", - " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", - "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->llama-index-core<0.13.0,>=0.12.13->llama-index>=0.7->ragatouille)\n", - " Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->git-python->colbert-ai==0.2.19->ragatouille) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->git-python->colbert-ai==0.2.19->ragatouille) (5.0.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.17.0)\n", - "Downloading ragatouille-0.0.8.post4-py3-none-any.whl (41 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl (8.8 kB)\n", - "Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (490 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m490.9/490.9 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.5/27.5 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_index-0.12.13-py3-none-any.whl (6.9 kB)\n", - "Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.0/16.0 MB\u001b[0m \u001b[31m106.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_index_agent_openai-0.4.2-py3-none-any.whl (13 kB)\n", - "Downloading llama_index_cli-0.4.0-py3-none-any.whl (27 kB)\n", - "Downloading llama_index_core-0.12.13-py3-none-any.whl (1.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m85.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl (6.2 kB)\n", - "Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl (13 kB)\n", - "Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl (14 kB)\n", - "Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl (5.9 kB)\n", - "Downloading llama_index_program_openai-0.3.1-py3-none-any.whl (5.3 kB)\n", - "Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl (2.9 kB)\n", - "Downloading llama_index_readers_file-0.4.4-py3-none-any.whl (39 kB)\n", - "Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl (2.5 kB)\n", - "Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (286 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.1/286.1 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading git_python-1.0.3-py2.py3-none-any.whl (1.9 kB)\n", - "Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m422.9/422.9 kB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pynvml-12.0.0-py3-none-any.whl (26 kB)\n", - "Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", - "Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dirtyjson-1.0.8-py3-none-any.whl (25 kB)\n", - "Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", - "Downloading llama_cloud-0.1.10-py3-none-any.whl (247 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m248.0/248.0 kB\u001b[0m \u001b[31m23.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_parse-0.5.20-py3-none-any.whl (16 kB)\n", - "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_ml_py-12.560.30-py3-none-any.whl (40 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pypdf-5.1.0-py3-none-any.whl (297 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.0/298.0 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading striprtf-0.0.26-py3-none-any.whl (6.9 kB)\n", - "Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m69.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", - "Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", - "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading marshmallow-3.26.0-py3-none-any.whl (50 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", - "Building wheels for collected packages: colbert-ai\n", - " Building wheel for colbert-ai (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for colbert-ai: filename=colbert_ai-0.2.19-py3-none-any.whl size=114759 sha256=338c5f895f655f35f3dbcc0a7a946dacaa589dd2f089863452f6a6160a178d08\n", - " Stored in directory: /root/.cache/pip/wheels/14/75/5f/9680ae93eb0258ccf3e9d8cd34f328c53f8888c06c37067f3a\n", - "Successfully built colbert-ai\n", - "Installing collected packages: striprtf, nvidia-ml-py, filetype, dirtyjson, bitarray, xxhash, voyager, ujson, srsly, python-dotenv, pypdf, pynvml, onnx, ninja, mypy-extensions, marshmallow, fsspec, faiss-cpu, dill, typing-inspect, tiktoken, multiprocess, llama-cloud, git-python, dataclasses-json, llama-index-core, fast-pytorch-kmeans, datasets, sentence-transformers, llama-parse, llama-index-readers-file, llama-index-llms-openai, llama-index-indices-managed-llama-cloud, llama-index-embeddings-openai, colbert-ai, llama-index-readers-llama-parse, llama-index-multi-modal-llms-openai, llama-index-cli, llama-index-agent-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index, ragatouille\n", - " Attempting uninstall: srsly\n", - " Found existing installation: srsly 2.5.0\n", - " Uninstalling srsly-2.5.0:\n", - " Successfully uninstalled srsly-2.5.0\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2024.10.0\n", - " Uninstalling fsspec-2024.10.0:\n", - " Successfully uninstalled fsspec-2024.10.0\n", - " Attempting uninstall: sentence-transformers\n", - " Found existing installation: sentence-transformers 3.3.1\n", - " Uninstalling sentence-transformers-3.3.1:\n", - " Successfully uninstalled sentence-transformers-3.3.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed bitarray-3.0.0 colbert-ai-0.2.19 dataclasses-json-0.6.7 datasets-3.2.0 dill-0.3.8 dirtyjson-1.0.8 faiss-cpu-1.9.0.post1 fast-pytorch-kmeans-0.2.0.1 filetype-1.2.0 fsspec-2024.9.0 git-python-1.0.3 llama-cloud-0.1.10 llama-index-0.12.13 llama-index-agent-openai-0.4.2 llama-index-cli-0.4.0 llama-index-core-0.12.13 llama-index-embeddings-openai-0.3.1 llama-index-indices-managed-llama-cloud-0.6.4 llama-index-llms-openai-0.3.14 llama-index-multi-modal-llms-openai-0.4.2 llama-index-program-openai-0.3.1 llama-index-question-gen-openai-0.3.0 llama-index-readers-file-0.4.4 llama-index-readers-llama-parse-0.4.0 llama-parse-0.5.20 marshmallow-3.26.0 multiprocess-0.70.16 mypy-extensions-1.0.0 ninja-1.11.1.3 nvidia-ml-py-12.560.30 onnx-1.17.0 pynvml-12.0.0 pypdf-5.1.0 python-dotenv-1.0.1 ragatouille-0.0.8.post4 sentence-transformers-2.7.0 srsly-2.4.8 striprtf-0.0.26 tiktoken-0.8.0 typing-inspect-0.9.0 ujson-5.10.0 voyager-2.1.0 xxhash-3.5.0\n" - ] - } - ], - "source": [ - "%pip install ragatouille PyPDF2" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "I0dl5xGnq3my", - "outputId": "68c881e3-6208-4748-f71b-f5a52b787108" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-49ruike5\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-49ruike5\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 17942ca192e0493c7c061e6f908cc2b945122ef6\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev62+g17942ca) (0.27.1)\n", - "Collecting llama-cpp-python (from structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting loguru (from structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev62+g17942ca) (2.10.5)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev62+g17942ca) (6.0.2)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Collecting unsloth (from structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev62+g17942ca) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2024.9.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (4.12.2)\n", - "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca) (1.26.4)\n", - "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca) (3.1.5)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev62+g17942ca) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev62+g17942ca) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (5.5.0)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (8.1.8)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev62+g17942ca) (6.3.3)\n", - "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", - "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (2.5.1+cu121)\n", - "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", - "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)\n", - "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.1.0)\n", - "Collecting tyro (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading tyro-0.9.13-py3-none-any.whl.metadata (9.4 kB)\n", - "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (4.47.1)\n", - "Requirement already satisfied: datasets>=2.16.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.2.0)\n", - "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.2.0)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (5.9.5)\n", - "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.45.1)\n", - "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.2.1)\n", - "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.14.0)\n", - "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", - "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", - "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.5.2)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (1.21.1)\n", - "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.3.8)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.5.0)\n", - "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.70.16)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.11.11)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev62+g17942ca) (4.0.12)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev62+g17942ca) (3.0.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2024.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev62+g17942ca) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2.18.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (3.4.2)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.1.105)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.13.1)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (12.6.85)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.3.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev62+g17942ca) (2024.11.6)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.21.0)\n", - "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", - "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.16)\n", - "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev62+g17942ca)\n", - " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev62+g17942ca) (4.4.1)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev62+g17942ca) (1.18.3)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev62+g17942ca) (5.0.2)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev62+g17942ca) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m117.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m114.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m99.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m101.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.7/69.7 MB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m108.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tyro-0.9.13-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", - "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", - "Building wheels for collected packages: structured-qa, fire, llama-cpp-python\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev62+g17942ca-py3-none-any.whl size=16254 sha256=4a483dde13b83e4423b427dc48638180c87ff1dca8cb35d4a09006ef2ca537d7\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-za51p1on/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=d9cddb798cd09136c441440f7a103ae7e5879184815ed603d5d49bd8a9e39570\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4070578 sha256=6aa44ca69ab5b970dc8498c4f69366acbfa247fd7a03849742858bf7ca77d063\n", - " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", - "Successfully built structured-qa fire llama-cpp-python\n", - "Installing collected packages: watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fire, diskcache, pymupdf4llm, pydeck, llama-cpp-python, tyro, xformers, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", - " Attempting uninstall: protobuf\n", - " Found existing installation: protobuf 4.25.5\n", - " Uninstalling protobuf-4.25.5:\n", - " Successfully uninstalled protobuf-4.25.5\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", - "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed bitsandbytes-0.45.1 cut_cross_entropy-25.1.1 diskcache-5.6.3 fire-0.7.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev62+g17942ca trl-0.13.0 tyro-0.9.13 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1\n" - ] - }, - { - "data": { - "application/vnd.colab-display-data+json": { - "id": "a56f4501de384e0e8c5cf504d1337657", - "pip_warning": { - "packages": [ - "google" - ] - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Nl_haxghq3mz", - "outputId": "18bc7cbf-feaa-481d-9d84-6c8390ff258d" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-01-24 10:32:07-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 14711 (14K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", - "\n", - "\rstructured_qa.csv 0%[ ] 0 --.-KB/s \rstructured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0s \n", - "\n", - "2025-01-24 10:32:07 (73.1 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZdWx_e7iq3mz" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "vGqX_bU5q3mz" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "cbkIjBYNq3mz" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "BiUeBWnIq3mz" - }, - "outputs": [], - "source": [ - "import PyPDF2\n", - "\n", - "\n", - "def load_pdf(pdf_file: str) -> str | None:\n", - " try:\n", - " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", - " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", - " except Exception as e:\n", - " logger.exception(e)\n", - " return None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to Process all questions for a single Document" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "id": "Ilxn8LGFq3m0" - }, - "outputs": [], - "source": [ - "import json\n", - "import time\n", - "\n", - "from ragatouille import RAGPretrainedModel\n", - "from ragatouille.data import CorpusProcessor\n", - "\n", - "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - "):\n", - " logger.info(\"Setting up RAG\")\n", - " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", - " corpus_processor = CorpusProcessor()\n", - " documents = corpus_processor.process_corpus([load_pdf(document_file)])\n", - " RAG.encode([x[\"content\"] for x in documents])\n", - "\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " if model.n > 0 and model.n % 9 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - "\n", - " logger.info(f\"Question: {question}\")\n", - " results = RAG.search_encoded_docs(query=question, k=3)\n", - " current_info = \"\\n\".join(result[\"content\"] for result in results)\n", - " logger.info(current_info[:100])\n", - "\n", - " answer = model.model.generate_content(\n", - " [f\"This is the document: {current_info}\", question]\n", - " )\n", - " logger.info(answer.text)\n", - " answers[index] = json.loads(answer.text)[\"answer\"]\n", - " sections[index] = None\n", - " model.n += 1\n", - "\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jr3ke2aJq3m0" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "id": "zKMHc0Ouq3m0" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "id": "cMBl2dxLq3m0" - }, - "outputs": [], - "source": [ - "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with a single key: \"answer\".\n", - "In `\"answer\"`, you will return the answer using one of the following JSON types:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"answer\": 12\n", - "}\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model?\n", - "-A: ReLU\n", - "-B: Sigmoid\n", - "-C: Tanh\n", - "{\n", - " \"answer\": \"C\"\n", - "}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "id": "QV3pBXvhq3m0" - }, - "outputs": [], - "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")\n", - "model.n = 0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j5jWlVBaq3m1" - }, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "W9r17Rz3q3m1", - "outputId": "3232af63-09f7-4377-dff2-e8df725c1445" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-24 11:10:27.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-24 11:10:27.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-01-24 11:10:27.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 1706.03762.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:10:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 56 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", - "\u001b[32m2025-01-24 11:11:45.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 2106.09685v2.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:11:45.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 137 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-01-24 11:12:00.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 2201.11903.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:12:00.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 199 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-01-24 11:13:21.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 2210.05189.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:13:21.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 44 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-01-24 11:14:30.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:14:30.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "jDIEL7SNq3mv" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 143 documents...\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "_OwS4mKRq3mv" + }, + "source": [ + "## GPU Check" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-01-24 11:14:42.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:14:42.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "yEgVEmSQq3mx" + }, + "source": [ + "## Installing dependencies" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 364 documents...\n" - ] + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "P1eAychVq3my", + "outputId": "ab1ecc14-7c37-46e6-f3ba-a4be47c4dc31" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting ragatouille\n", + " Downloading ragatouille-0.0.8.post4-py3-none-any.whl.metadata (15 kB)\n", + "Collecting PyPDF2\n", + " Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)\n", + "Collecting colbert-ai==0.2.19 (from ragatouille)\n", + " Downloading colbert-ai-0.2.19.tar.gz (86 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.7/86.7 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting faiss-cpu<2.0.0,>=1.7.4 (from ragatouille)\n", + " Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)\n", + "Collecting fast-pytorch-kmeans==0.2.0.1 (from ragatouille)\n", + " Downloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl.metadata (1.1 kB)\n", + "Requirement already satisfied: langchain>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.15)\n", + "Requirement already satisfied: langchain_core>=0.1.4 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.31)\n", + "Collecting llama-index>=0.7 (from ragatouille)\n", + " Downloading llama_index-0.12.14-py3-none-any.whl.metadata (12 kB)\n", + "Collecting onnx<2.0.0,>=1.15.0 (from ragatouille)\n", + " Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n", + "Collecting sentence-transformers<3.0.0,>=2.2.2 (from ragatouille)\n", + " Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)\n", + "Collecting srsly==2.4.8 (from ragatouille)\n", + " Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", + "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.5.1+cu121)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.36.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (4.47.1)\n", + "Collecting voyager<3.0.0,>=2.0.2 (from ragatouille)\n", + " Downloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)\n", + "Collecting bitarray (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)\n", + "Collecting datasets (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: flask in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.1.0)\n", + "Collecting git-python (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading git_python-1.0.3-py2.py3-none-any.whl.metadata (331 bytes)\n", + "Collecting python-dotenv (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", + "Collecting ninja (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.13.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (4.67.1)\n", + "Collecting ujson (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fast-pytorch-kmeans==0.2.0.1->ragatouille) (1.26.4)\n", + "Collecting pynvml (from fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", + " Downloading pynvml-12.0.0-py3-none-any.whl.metadata (5.4 kB)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.11/dist-packages (from srsly==2.4.8->ragatouille) (2.0.10)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from faiss-cpu<2.0.0,>=1.7.4->ragatouille) (24.2)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.0.37)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (3.11.11)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.5)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.1)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.10.6)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (9.0.0)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (1.33)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (4.12.2)\n", + "Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_agent_openai-0.4.3-py3-none-any.whl.metadata (727 bytes)\n", + "Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting llama-index-core<0.13.0,>=0.12.14 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_core-0.12.14-py3-none-any.whl.metadata (2.5 kB)\n", + "Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)\n", + "Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting llama-index-multi-modal-llms-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl.metadata (726 bytes)\n", + "Collecting llama-index-program-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_program_openai-0.3.1-py3-none-any.whl.metadata (764 bytes)\n", + "Collecting llama-index-question-gen-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl.metadata (783 bytes)\n", + "Collecting llama-index-readers-file<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_readers_file-0.4.4-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting llama-index-readers-llama-parse>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl.metadata (3.6 kB)\n", + "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (3.9.1)\n", + "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.11/dist-packages (from onnx<2.0.0,>=1.15.0->ragatouille) (4.25.6)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (1.6.1)\n", + "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (0.27.1)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (11.1.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.17.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.5)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2024.10.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", + "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (1.13.1)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13->ragatouille) (12.8.61)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.13->ragatouille) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.21.0)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.5.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.18.3)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain_core>=0.1.4->ragatouille) (3.0.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.28.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.10.15)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.0)\n", + "Requirement already satisfied: zstandard<0.24.0,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.23.0)\n", + "Requirement already satisfied: openai>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.59.9)\n", + "Collecting dataclasses-json (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.2.17)\n", + "Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)\n", + "Collecting filetype<2.0.0,>=1.2.0 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.6.0)\n", + "Collecting tiktoken>=0.3.3 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "Collecting typing-inspect>=0.8.0 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.17.2)\n", + "Collecting llama-cloud<0.2.0,>=0.1.8 (from llama-index-indices-managed-llama-cloud>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading llama_cloud-0.1.11-py3-none-any.whl.metadata (912 bytes)\n", + "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (4.12.3)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.2.2)\n", + "Collecting pypdf<6.0.0,>=5.1.0 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading pypdf-5.2.0-py3-none-any.whl.metadata (7.2 kB)\n", + "Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)\n", + "Collecting llama-parse>=0.5.0 (from llama-index-readers-llama-parse>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading llama_parse-0.5.20-py3-none-any.whl.metadata (6.9 kB)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (8.1.8)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (1.4.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2024.12.14)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain>=0.1.0->ragatouille) (3.1.1)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (17.0.0)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Collecting xxhash (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec (from torch>=1.13->ragatouille)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (3.1.3)\n", + "Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (2.2.0)\n", + "Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (1.9.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.13->ragatouille) (3.0.2)\n", + "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from git-python->colbert-ai==0.2.19->ragatouille) (3.1.44)\n", + "Collecting nvidia-ml-py<13.0.0a0,>=12.0.0 (from pynvml->fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", + " Downloading nvidia_ml_py-12.570.86-py3-none-any.whl.metadata (8.7 kB)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers<3.0.0,>=2.2.2->ragatouille) (3.5.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.6)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.14.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.9.0)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (0.8.2)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.3.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->git-python->colbert-ai==0.2.19->ragatouille) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2025.1)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->git-python->colbert-ai==0.2.19->ragatouille) (5.0.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.17.0)\n", + "Downloading ragatouille-0.0.8.post4-py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl (8.8 kB)\n", + "Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (490 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m490.9/490.9 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.5/27.5 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index-0.12.14-py3-none-any.whl (6.9 kB)\n", + "Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.0/16.0 MB\u001b[0m \u001b[31m105.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m100.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_agent_openai-0.4.3-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_cli-0.4.0-py3-none-any.whl (27 kB)\n", + "Downloading llama_index_core-0.12.14-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m81.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl (6.2 kB)\n", + "Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl (14 kB)\n", + "Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl (5.9 kB)\n", + "Downloading llama_index_program_openai-0.3.1-py3-none-any.whl (5.3 kB)\n", + "Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl (2.9 kB)\n", + "Downloading llama_index_readers_file-0.4.4-py3-none-any.whl (39 kB)\n", + "Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl (2.5 kB)\n", + "Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (286 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.1/286.1 kB\u001b[0m \u001b[31m27.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m41.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m20.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading git_python-1.0.3-py2.py3-none-any.whl (1.9 kB)\n", + "Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m422.9/422.9 kB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pynvml-12.0.0-py3-none-any.whl (26 kB)\n", + "Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dirtyjson-1.0.8-py3-none-any.whl (25 kB)\n", + "Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", + "Downloading llama_cloud-0.1.11-py3-none-any.whl (250 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.6/250.6 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_parse-0.5.20-py3-none-any.whl (16 kB)\n", + "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_ml_py-12.570.86-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pypdf-5.2.0-py3-none-any.whl (298 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.7/298.7 kB\u001b[0m \u001b[31m29.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading striprtf-0.0.26-py3-none-any.whl (6.9 kB)\n", + "Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading marshmallow-3.26.0-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Building wheels for collected packages: colbert-ai\n", + " Building wheel for colbert-ai (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for colbert-ai: filename=colbert_ai-0.2.19-py3-none-any.whl size=114759 sha256=1259c9368306c751f97b0a8a0e9b0b834b62d6afab66dc017b037e175bb4e949\n", + " Stored in directory: /root/.cache/pip/wheels/14/75/5f/9680ae93eb0258ccf3e9d8cd34f328c53f8888c06c37067f3a\n", + "Successfully built colbert-ai\n", + "Installing collected packages: striprtf, nvidia-ml-py, filetype, dirtyjson, bitarray, xxhash, voyager, ujson, srsly, python-dotenv, PyPDF2, pypdf, pynvml, onnx, ninja, mypy-extensions, marshmallow, fsspec, faiss-cpu, dill, typing-inspect, tiktoken, multiprocess, llama-cloud, git-python, dataclasses-json, llama-index-core, fast-pytorch-kmeans, datasets, sentence-transformers, llama-parse, llama-index-readers-file, llama-index-llms-openai, llama-index-indices-managed-llama-cloud, llama-index-embeddings-openai, colbert-ai, llama-index-readers-llama-parse, llama-index-multi-modal-llms-openai, llama-index-cli, llama-index-agent-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index, ragatouille\n", + " Attempting uninstall: srsly\n", + " Found existing installation: srsly 2.5.1\n", + " Uninstalling srsly-2.5.1:\n", + " Successfully uninstalled srsly-2.5.1\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + " Attempting uninstall: sentence-transformers\n", + " Found existing installation: sentence-transformers 3.3.1\n", + " Uninstalling sentence-transformers-3.3.1:\n", + " Successfully uninstalled sentence-transformers-3.3.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed PyPDF2-3.0.1 bitarray-3.0.0 colbert-ai-0.2.19 dataclasses-json-0.6.7 datasets-3.2.0 dill-0.3.8 dirtyjson-1.0.8 faiss-cpu-1.9.0.post1 fast-pytorch-kmeans-0.2.0.1 filetype-1.2.0 fsspec-2024.9.0 git-python-1.0.3 llama-cloud-0.1.11 llama-index-0.12.14 llama-index-agent-openai-0.4.3 llama-index-cli-0.4.0 llama-index-core-0.12.14 llama-index-embeddings-openai-0.3.1 llama-index-indices-managed-llama-cloud-0.6.4 llama-index-llms-openai-0.3.14 llama-index-multi-modal-llms-openai-0.4.2 llama-index-program-openai-0.3.1 llama-index-question-gen-openai-0.3.0 llama-index-readers-file-0.4.4 llama-index-readers-llama-parse-0.4.0 llama-parse-0.5.20 marshmallow-3.26.0 multiprocess-0.70.16 mypy-extensions-1.0.0 ninja-1.11.1.3 nvidia-ml-py-12.570.86 onnx-1.17.0 pynvml-12.0.0 pypdf-5.2.0 python-dotenv-1.0.1 ragatouille-0.0.8.post4 sentence-transformers-2.7.0 srsly-2.4.8 striprtf-0.0.26 tiktoken-0.8.0 typing-inspect-0.9.0 ujson-5.10.0 voyager-2.1.0 xxhash-3.5.0\n" + ] + } + ], + "source": [ + "%pip install ragatouille PyPDF2" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/12 [00:00structured-qa==0.3.3.dev77+g0b8e5cf) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.9.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev77+g0b8e5cf) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev77+g0b8e5cf) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev77+g0b8e5cf)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m93.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m102.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m41.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m508.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev77+g0b8e5cf-py3-none-any.whl size=16202 sha256=471f9739e08b922697b7117495ad211785a46ff89fe92d96a32d34b93fba365b\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-xj29gks5/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=aaae19261f87cd2a4893f2b83751a95da7e0b5fae55a2d71054ad33c10ceac02\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 streamlit-1.41.1 structured-qa-0.3.3.dev77+g0b8e5cf watchdog-6.0.0\n" + ] + } + ], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shapes:\n", - "encodings: torch.Size([364, 508, 128])\n", - "doc_masks: torch.Size([364, 508])\n", - "Documents encoded!\n" - ] + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Nl_haxghq3mz", + "outputId": "9b2a4855-a3c3-4395-bc32-9a41a9030f36" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2025-01-29 09:01:27-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21734 (21K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "structured_qa.csv 100%[===================>] 21.22K --.-KB/s in 0.002s \n", + "\n", + "2025-01-29 09:01:27 (13.3 MB/s) - ‘structured_qa.csv’ saved [21734/21734]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-24 11:15:48.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:48.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mThis range may be amended to between 2 and 15 French degrees in the event that a specific \n", - "demand e\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:50.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:50.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:50.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mI.2.8. GAS DETECTION AND VE NTING \n", - "The requirements outlined below must be met in addition to th\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:51.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:51.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the daylight factor required for façades with exterior obstructions?\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:51.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m4. VISUAL COMFORT \n", - "4.1. Natural lighting \n", - "Natural light is required for all permanent wor k \u001b[0m\n", - "\u001b[32m2025-01-24 11:15:53.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"0.7\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:53.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:53.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m- There must be no external windows, unless this is unavoidable for technical reasons. \n", - "Access cont\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:54.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:54.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:54.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile ?uri=OJ:L_202401689.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:15:54.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "ZdWx_e7iq3mz" + }, + "source": [ + "# Setup" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 754 documents...\n" - ] + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "vGqX_bU5q3mz" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", + "\n", + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/24 [00:00 str | None:\n", + " try:\n", + " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", + " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", + " except Exception as e:\n", + " logger.exception(e)\n", + " return None" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-24 11:16:03.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:03.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:03.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mIn particular , data \n", - "sets should take into account, to the extent required by their intended purpos\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:04.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:04.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:04.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mThe full range of capabilities in a model could be better \n", - "understo od after its placing on the mark\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:06.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Not applicable\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:06.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:06.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mThis obligation shall not apply to AI systems author ised by law to \n", - "detect , prevent, invest igate \u001b[0m\n", - "\u001b[32m2025-01-24 11:16:07.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:07.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:07.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m7. Upon receiving a notif ication related to a serious incident refer red to in Article 3, point (49\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:09.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:16:09.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:09.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:09.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m(e)data collected and processed for the purpose of the testing in real world conditions shall be tra\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:11.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:11.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:11.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mOJ L, 12.7.2024 EN\n", - "ELI: http://data.europa.eu/eli/reg/2024/1689/oj 115/144\n", - "5. The supply of incor re\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:12.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:12.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:12.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mThe AI Office shall assist in the assessment of available standards.\n", - "9. Codes of practice shall be r\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:14.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:14.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:14.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m7. The marke t surveillance author ities other than the mark et surveillance author ity of the Membe\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:16.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:16.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:16.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m4. Impor ters shall ensure that, while a high-r isk AI syste m is under their responsibility , stora\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:20.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:20.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:20.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mCHAPTER VII\n", - "GOVERNANCE\n", - "SECTION 1\n", - "Gove rnance at Union level\n", - "Article 64\n", - "AI Office\n", - "1. The Commission s\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:22.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:22.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:22.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile 7DUME_EN01_Rules.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:17:22.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "z0B2yhFISDgG" + }, + "source": [ + "## Function to Process all questions for a single Document" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 17 documents...\n" - ] + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "Ilxn8LGFq3m0" + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "from ragatouille import RAGPretrainedModel\n", + "from ragatouille.data import CorpusProcessor\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", + " logger.info(\"Setting up RAG\")\n", + " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", + " corpus_processor = CorpusProcessor()\n", + " documents = corpus_processor.process_corpus([load_pdf(document_file)])\n", + " RAG.encode([x[\"content\"] for x in documents])\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " if model.n > 0 and model.n % 9 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " question_part, *options = question.split(\"?\")\n", + "\n", + " logger.info(f\"Question: {question}\")\n", + " results = RAG.search_encoded_docs(query=question_part, k=3)\n", + " current_info = \"\\n\".join(result[\"content\"] for result in results)\n", + " logger.info(current_info[:100])\n", + "\n", + " answer = model.model.generate_content(\n", + " [f\"This is the document: {current_info}\", question]\n", + " )\n", + " logger.info(answer.text)\n", + " answers[index] = json.loads(answer.text)[\"answer\"]\n", + " sections[index] = None\n", + " model.n += 1\n", + "\n", + " return answers, sections" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:48.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile is_eotn_rulebook.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:48.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "cMBl2dxLq3m0" + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with a single key: \"answer\".\n", + "In `\"answer\"`, you will return the answer using one of the following JSON types:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model?\n", + "-A: ReLU\n", + "-B: Sigmoid\n", + "-C: Tanh\n", + "{\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 48 documents...\n" - ] + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "QV3pBXvhq3m0" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")\n", + "model.n = 0" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-29 09:01:53.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:01:53.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:01:53.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "artifact.metadata: 0%| | 0.00/1.63k [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-29 09:04:06.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:04:06.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 56 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:05:32.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:05:32.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 137 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-29 09:05:49.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:05:49.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 199 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-29 09:07:11.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:07:11.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 44 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:08:24.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:08:24.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 143 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:08:37.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:08:37.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 364 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/12 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:09:53.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:09:53.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 1803 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/57 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-29 09:11:47.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689 to ?uri=OJ:L_202401689.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:11:47.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 754 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/24 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:13:14.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:13:14.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 17 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:15:41.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 09:15:41.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 48 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/2 [00:00Discard any remaining, face-up Island cards and reveal new ones.\n", + " >Pass the First player marker to \u001b[0m\n", + "\u001b[32m2025-01-29 09:15:53.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:15:53.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:53.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:53.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>There is no limit to the number, type, or order of \n", + "actions a player may take during the Action pha\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:55.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:55.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:55.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mRations are needed for the long journey!\n", + "A player can choose to Pillage a selected Island card with\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:57.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:57.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:57.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mEach \n", + "action draws the clans closer to becoming the greatest empire! The \n", + "game ends in the same roun\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:58.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": \"The document says that players gain VPs from Pillaging Islands but not how many victory points each conquered island gives\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:58.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-29 09:16:58.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mGAME FLOW\n", + "Note for Imperial Settlers fans \n", + "You cannot Spend 2 Workers \n", + "to get a Resource or a card.\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:00.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:00.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:00.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", + "the Resources on the righ\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:01.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": \"1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:01.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:01.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNOTE 2: Some abilities in the \n", + "game have a ‘/’ divider between \n", + "presented choices. This should be \n", + "t\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:04.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + "\"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:04.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:04.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThus allowing a player to play \n", + "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:07.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:07.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:07.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:09.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:09.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:09.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-29 09:17:12.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-24 11:19:51.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:51.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:51.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mNOTE 1: There’s no limit to the number of cards a player may have \n", - "in their hand. \n", - "NOTE 2: If the\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:52.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:52.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:52.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mStarting with the First player and continuing clockwise, \n", - "each player performs one action at a time.\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:53.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:53.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:53.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m>There is no limit to the number, type, or order of \n", - "actions a player may take during the Action pha\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:55.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:55.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:55.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m>Discard any remaining, face-up Island cards and reveal new ones.\n", - " >Pass the First player marker to \u001b[0m\n", - "\u001b[32m2025-01-24 11:19:57.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:57.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:57.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mGAME FLOW\n", - "Note for Imperial Settlers fans \n", - "You cannot Spend 2 Workers \n", - "to get a Resource or a card.\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:58.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:19:58.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-24 11:20:58.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-24 11:20:58.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", - "the Resources on the righ\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:00.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - "\"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:00.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:00.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1mThus allowing a player to play \n", - "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:01.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:01.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:01.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:07.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:07.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:07.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-01-24 11:21:08.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 262 + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 269 + }, + "id": "mltqL7Bhq3m1", + "outputId": "9fc0b64a-2b6c-4e05-9165-5b6b5bf52508" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "44 44 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "66 66 https://github.com/mozilla-ai/structured-qa/re... \n", + "83 83 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "\n", + " section \\\n", + "22 Classification of general-purpose AI models as... \n", + "44 3.2 Results \n", + "47 CARD AND TILE EFFECTS \n", + "52 CARD AND TILE COSTS \n", + "55 CARD AND TILE EFFECTS \n", + "66 EXPEDITION PHASE \n", + "83 15.3. API Fundamentals \n", + "\n", + " question answer \\\n", + "22 What is the threshold, measured in floating po... C \n", + "44 How many random samples were examined to under... 100 \n", + "47 How many different races are there? 6 \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "55 Which type of cards provide coins? -A: Gray -B... B \n", + "66 How many victory points you get from each conq... 1 \n", + "83 When are virtual addresses assigned to graph a... C \n", + "\n", + " pred_answer pred_section \n", + "22 NO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCU... NaN \n", + "44 50 NaN \n", + "47 7 NaN \n", + "52 NO NaN \n", + "55 NONE OF THE ABOVE NaN \n", + "66 THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM P... NaN \n", + "83 A NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Classification of general-purpose AI models as...What is the threshold, measured in floating po...CNO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCU...NaN
4444https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?67NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5555https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BNONE OF THE ABOVENaN
6666https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEHow many victory points you get from each conq...1THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM P...NaN
8383https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsWhen are virtual addresses assigned to graph a...CANaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 7,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18,\n \"min\": 22,\n \"max\": 83,\n \"num_unique_values\": 7,\n \"samples\": [\n 22,\n 44,\n 66\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Classification of general-purpose AI models as general-purpose AI models with systemic risk\",\n \"3.2 Results\",\n \"15.3. API Fundamentals\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\",\n \"How many random samples were examined to understand model performance?\",\n \"How many victory points you get from each conquered island?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"C\",\n \"100\",\n \"1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"NO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCUMENT.\",\n \"50\",\n \"THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM PILLAGING ISLANDS BUT NOT HOW MANY VICTORY POINTS EACH CONQUERED ISLAND GIVES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] }, - "id": "mltqL7Bhq3m1", - "outputId": "54479b50-365a-4f5c-a06e-b6de90b773b6" - }, - "outputs": [ { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 16,\n \"min\": 12,\n \"max\": 50,\n \"num_unique_values\": 4,\n \"samples\": [\n 22,\n 50,\n 12\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Prohibited AI Practices (Article 5)\",\n \"CHAPTER OVERVIEW\",\n \"2.1 Fully Connected Networks\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\",\n \"Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\",\n \"Does the model use Sigmoid activation function?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"NO\",\n \"C\",\n \"A\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"B\",\n \"C\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c4z9XxXWq3m1", + "outputId": "327b1131-3ed5-40ff-f68d-48b0727873c2" }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1212https://arxiv.org/pdf/2210.051892.1 Fully Connected NetworksDoes the model use Sigmoid activation function?NOYESNaN
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Prohibited AI Practices (Article 5)Which type of AI systems are banned by the AI ...CBNaN
2424https://eur-lex.europa.eu/legal-content/EN/TXT...Classification rules (article 51)What is the threshold, measured in floating po...CNOT APPLICABLENaN
5050https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWWhich player begins the game? -A: Sauron -B: T...ACNaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9292929292929293" + ] + }, + "metadata": {}, + "execution_count": 14 + } ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "12 12 https://arxiv.org/pdf/2210.05189 \n", - "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "24 24 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "50 50 https://github.com/mozilla-ai/structured-qa/re... \n", - "\n", - " section \\\n", - "12 2.1 Fully Connected Networks \n", - "22 Prohibited AI Practices (Article 5) \n", - "24 Classification rules (article 51) \n", - "50 CHAPTER OVERVIEW \n", - "\n", - " question answer pred_answer \\\n", - "12 Does the model use Sigmoid activation function? NO YES \n", - "22 Which type of AI systems are banned by the AI ... C B \n", - "24 What is the threshold, measured in floating po... C NOT APPLICABLE \n", - "50 Which player begins the game? -A: Sauron -B: T... A C \n", - "\n", - " pred_section \n", - "12 NaN \n", - "22 NaN \n", - "24 NaN \n", - "50 NaN " + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "UXg_TC7R28QI" + }, + "outputs": [], + "source": [] } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { + ], + "metadata": { + "accelerator": "GPU", "colab": { - "base_uri": "https://localhost:8080/" + "gpuType": "T4", + "provenance": [] }, - "id": "c4z9XxXWq3m1", - "outputId": "6acb2a06-aaa7-460f-b6cd-6b7bf87aa24e" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9473684210526315" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "20d67e8902244d87ad72120b9fb71284": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1e7fcaa0156d4f09af4bf24a8607e787", + "IPY_MODEL_0bad96f6403c4042a9ed7bb491c1b25d", + "IPY_MODEL_1c9e0ff5abab4e378959f47c5655f9f7" + ], + "layout": "IPY_MODEL_dd0ddf2594eb42b4babe6eeaf6a59bbb" + } + }, + "1e7fcaa0156d4f09af4bf24a8607e787": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5e532f20ae6d4a5c90d5beba1518d3ee", + "placeholder": "​", + "style": "IPY_MODEL_50215024305b41c38aec0a3808b3bc84", + "value": "artifact.metadata: 100%" + } + }, + "0bad96f6403c4042a9ed7bb491c1b25d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a42220b511f14cd8b89f5071c0d216a4", + "max": 1633, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_46097609bd4b46fa94c27a5dcfe98a1a", + "value": 1633 + } + }, + "1c9e0ff5abab4e378959f47c5655f9f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e3084de2764a48089029ffafa1087e8a", + "placeholder": "​", + "style": "IPY_MODEL_420798f709e2420d81d7223c34ca442e", + "value": " 1.63k/1.63k [00:00<00:00, 72.1kB/s]" + } + }, + "dd0ddf2594eb42b4babe6eeaf6a59bbb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5e532f20ae6d4a5c90d5beba1518d3ee": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "50215024305b41c38aec0a3808b3bc84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a42220b511f14cd8b89f5071c0d216a4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "46097609bd4b46fa94c27a5dcfe98a1a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e3084de2764a48089029ffafa1087e8a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "420798f709e2420d81d7223c34ca442e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8209dde69d4147739c522342bfedcccd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_066c98c9848e4e00b68d0e98ec6f3c1f", + "IPY_MODEL_c88515f583bd469ca0d6ca54a812ca14", + "IPY_MODEL_a47e31ce610b4dcf8ac934ec11aefc65" + ], + "layout": "IPY_MODEL_38bd9b6cec8f42f1a9b2caca71478f4b" + } + }, + "066c98c9848e4e00b68d0e98ec6f3c1f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c8939bbe84c24ff8ad43c8d996d29af2", + "placeholder": "​", + "style": "IPY_MODEL_9a8317a8c8754d4d8b513a7fb0366c8d", + "value": "config.json: 100%" + } + }, + "c88515f583bd469ca0d6ca54a812ca14": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ea0ed18363ec4a86b0383e0b43d38ac7", + "max": 743, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dfb0d5f3c9ae46dc910d335a9215521a", + "value": 743 + } + }, + "a47e31ce610b4dcf8ac934ec11aefc65": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a2a6f8043e9943c7a6ec9112ac3d33bd", + "placeholder": "​", + "style": "IPY_MODEL_8d18d3f17569471fade4a2df380a245c", + "value": " 743/743 [00:00<00:00, 64.1kB/s]" + } + }, + "38bd9b6cec8f42f1a9b2caca71478f4b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c8939bbe84c24ff8ad43c8d996d29af2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a8317a8c8754d4d8b513a7fb0366c8d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ea0ed18363ec4a86b0383e0b43d38ac7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dfb0d5f3c9ae46dc910d335a9215521a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a2a6f8043e9943c7a6ec9112ac3d33bd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8d18d3f17569471fade4a2df380a245c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "29d523b694174b7596944eeb86a553d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_73d7ade0b58b41d1b1ac109026eeabc8", + "IPY_MODEL_cc1c0fcd84b94a199612c3e7ccd906cd", + "IPY_MODEL_5dbe5cc9d6e64e5cb62e7018a42e1f8e" + ], + "layout": "IPY_MODEL_56de5716ee0146158e399759aef55c41" + } + }, + "73d7ade0b58b41d1b1ac109026eeabc8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f05356467fa4c2ab321004efa06e9c9", + "placeholder": "​", + "style": "IPY_MODEL_ed08c56e20194dbca6732642fb4af466", + "value": "model.safetensors: 100%" + } + }, + "cc1c0fcd84b94a199612c3e7ccd906cd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_266e8497e8b04e3fad5d23391960ed13", + "max": 438349816, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0bebf69871bb4d04a5329ecb32d64b06", + "value": 438349816 + } + }, + "5dbe5cc9d6e64e5cb62e7018a42e1f8e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_be5d5dcca8cb498d8eb982b1cc1273fc", + "placeholder": "​", + "style": "IPY_MODEL_f6e97fcb881443beaec839bd64530d2d", + "value": " 438M/438M [00:02<00:00, 248MB/s]" + } + }, + "56de5716ee0146158e399759aef55c41": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7f05356467fa4c2ab321004efa06e9c9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ed08c56e20194dbca6732642fb4af466": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "266e8497e8b04e3fad5d23391960ed13": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0bebf69871bb4d04a5329ecb32d64b06": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "be5d5dcca8cb498d8eb982b1cc1273fc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f6e97fcb881443beaec839bd64530d2d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d1b166882cef441c816a75b784b3dcb0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5f145f7ffcd540149cd775f01e3da418", + "IPY_MODEL_6766b3d159fd4c29b853f3ad44616429", + "IPY_MODEL_e3ec24ca9f384b6e8a6b25f66c9a2872" + ], + "layout": "IPY_MODEL_5b034562b2354e70a27bc06f5fe674cd" + } + }, + "5f145f7ffcd540149cd775f01e3da418": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f76cfc2d293d4b409e4fc8bfa805af96", + "placeholder": "​", + "style": "IPY_MODEL_167d14dc1f3b42fe9f4d9cc2ec341363", + "value": "tokenizer_config.json: 100%" + } + }, + "6766b3d159fd4c29b853f3ad44616429": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3919381f1ae247219c7e4378a5d2e1ff", + "max": 405, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2a59d91e7621422ebda4fefca0ee6760", + "value": 405 + } + }, + "e3ec24ca9f384b6e8a6b25f66c9a2872": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d44706bfc8494edc8f266d3a94ff16a2", + "placeholder": "​", + "style": "IPY_MODEL_13dd434100e747588f8be140f55305a3", + "value": " 405/405 [00:00<00:00, 30.5kB/s]" + } + }, + "5b034562b2354e70a27bc06f5fe674cd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f76cfc2d293d4b409e4fc8bfa805af96": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "167d14dc1f3b42fe9f4d9cc2ec341363": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3919381f1ae247219c7e4378a5d2e1ff": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2a59d91e7621422ebda4fefca0ee6760": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d44706bfc8494edc8f266d3a94ff16a2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "13dd434100e747588f8be140f55305a3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "81a8270d87ef4c9b80c46c5236c8292f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7d4aa0529fb74e81a08cc12aeb243456", + "IPY_MODEL_67f985db0d7b41f7b15f135d6acb039e", + "IPY_MODEL_6da62a5ad31940329f00748ad6eab4da" + ], + "layout": "IPY_MODEL_27ec9d176d11451bb049b62c278a86ff" + } + }, + "7d4aa0529fb74e81a08cc12aeb243456": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4e15263fae0140299c6a55ce95f7bd43", + "placeholder": "​", + "style": "IPY_MODEL_cacdb3a3a0e04ca3b744fb82a3dcc925", + "value": "vocab.txt: 100%" + } + }, + "67f985db0d7b41f7b15f135d6acb039e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_97d1348ebec44687ac2a9151d52b1e8f", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ae77818599cd4bc2ac761865e81c3f15", + "value": 231508 + } + }, + "6da62a5ad31940329f00748ad6eab4da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_44b4d7daccdb46f19db7675c3a7d4f49", + "placeholder": "​", + "style": "IPY_MODEL_ee9dca0e0f2c49a49fb50b623818cda9", + "value": " 232k/232k [00:00<00:00, 1.77MB/s]" + } + }, + "27ec9d176d11451bb049b62c278a86ff": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e15263fae0140299c6a55ce95f7bd43": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cacdb3a3a0e04ca3b744fb82a3dcc925": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "97d1348ebec44687ac2a9151d52b1e8f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae77818599cd4bc2ac761865e81c3f15": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "44b4d7daccdb46f19db7675c3a7d4f49": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee9dca0e0f2c49a49fb50b623818cda9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3bd3d79c0262467296061f64606e57ce": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_024598891b4f46299dc20b5cfd714e0c", + "IPY_MODEL_9846ac95a9864f6aad40bffcd1595c48", + "IPY_MODEL_f7e1a279ca7a4576a67d600c6e0fcad6" + ], + "layout": "IPY_MODEL_1eda4198a078469dbba236c3ed8654c3" + } + }, + "024598891b4f46299dc20b5cfd714e0c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_45ce30572c20425691ebdabe0696b0ec", + "placeholder": "​", + "style": "IPY_MODEL_667241a7a4e6442b9e32450dbcbb0f56", + "value": "tokenizer.json: 100%" + } + }, + "9846ac95a9864f6aad40bffcd1595c48": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_11c36278698f4a6e8f606811eaff2166", + "max": 466081, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c8a050cfb1164c1cbecb0a86bc555d9c", + "value": 466081 + } + }, + "f7e1a279ca7a4576a67d600c6e0fcad6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_004ad74940344b6eb376ae4cfc85f26b", + "placeholder": "​", + "style": "IPY_MODEL_cb69dbb882694ed3bab1a2b35e0df524", + "value": " 466k/466k [00:00<00:00, 3.50MB/s]" + } + }, + "1eda4198a078469dbba236c3ed8654c3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "45ce30572c20425691ebdabe0696b0ec": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "667241a7a4e6442b9e32450dbcbb0f56": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "11c36278698f4a6e8f606811eaff2166": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c8a050cfb1164c1cbecb0a86bc555d9c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "004ad74940344b6eb376ae4cfc85f26b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb69dbb882694ed3bab1a2b35e0df524": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "54af3da7793c404fa8b4e1062185ea68": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_24ae74e4073749fba785b660dac48f4c", + "IPY_MODEL_895f37ac364f4c1aa4b3089fa286fca3", + "IPY_MODEL_f63e1751a94246888bf0426a2288cb36" + ], + "layout": "IPY_MODEL_90076a55ec674636b93c7b1d741ea374" + } + }, + "24ae74e4073749fba785b660dac48f4c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_944a78e6adaf4e3a87551d0bd5a6fc75", + "placeholder": "​", + "style": "IPY_MODEL_8d7d0da8d2344625aeef3d1c452a9c68", + "value": "special_tokens_map.json: 100%" + } + }, + "895f37ac364f4c1aa4b3089fa286fca3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_747558448b5e40038b270a6a6f6af6f0", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_81ee5fe4f8044ab9819b9f767c41826e", + "value": 112 + } + }, + "f63e1751a94246888bf0426a2288cb36": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f1165cdc7ef4701889d0e6de6ac9ed1", + "placeholder": "​", + "style": "IPY_MODEL_1601603b8da04598b2a3b1b6532b9de9", + "value": " 112/112 [00:00<00:00, 8.30kB/s]" + } + }, + "90076a55ec674636b93c7b1d741ea374": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "944a78e6adaf4e3a87551d0bd5a6fc75": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8d7d0da8d2344625aeef3d1c452a9c68": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "747558448b5e40038b270a6a6f6af6f0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "81ee5fe4f8044ab9819b9f767c41826e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4f1165cdc7ef4701889d0e6de6ac9ed1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1601603b8da04598b2a3b1b6532b9de9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UXg_TC7R28QI" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index 73a76f0..d3c8e6d 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -41,11 +41,10 @@ "execution_count": 1, "metadata": { "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + "base_uri": "https://localhost:8080/" }, "id": "QrgOGtuGlyhT", - "outputId": "62d28154-d186-4417-b032-6701fd174ecd" + "outputId": "9ef0b54f-0c53-46ae-b242-c38114be8e1d" }, "outputs": [ { @@ -53,235 +52,101 @@ "output_type": "stream", "text": [ "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-e3shdxjv\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-e3shdxjv\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-evgza823\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-evgza823\n", " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", " Switched to a new branch '5-add-benchmark'\n", " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit d125b79bb7bfdeab751f93bac37039950fe24ce5\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit ae325d3fd34f87be6ec8ca17d9b56a9a96c983fd\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev56+gd125b79)\n", + "Collecting fire (from structured-qa==0.3.3.dev71+gae325d3)\n", " Downloading fire-0.7.0.tar.gz (87 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (0.27.1)\n", - "Collecting llama-cpp-python (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading llama_cpp_python-0.3.6.tar.gz (66.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting loguru (from structured-qa==0.3.3.dev56+gd125b79)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", + "Collecting loguru (from structured-qa==0.3.3.dev71+gae325d3)\n", " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (2.10.5)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev56+gd125b79)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev71+gae325d3)\n", " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev56+gd125b79) (6.0.2)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev56+gd125b79)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev71+gae325d3)\n", " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Collecting unsloth (from structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading unsloth-2025.1.6-py3-none-any.whl.metadata (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.7/53.7 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev56+gd125b79) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (4.12.2)\n", - "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (1.26.4)\n", - "Collecting diskcache>=5.6.1 (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.1.5)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev56+gd125b79) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev56+gd125b79)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3)\n", " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.5.0)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (8.1.8)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev71+gae325d3)\n", " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev71+gae325d3)\n", " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev56+gd125b79) (6.3.3)\n", - "Collecting unsloth_zoo>=2025.1.4 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", - "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.5.1+cu121)\n", - "Collecting xformers>=0.0.27.post2 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", - "Collecting bitsandbytes (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", - "Requirement already satisfied: triton>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.1.0)\n", - "Collecting tyro (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading tyro-0.9.12-py3-none-any.whl.metadata (9.4 kB)\n", - "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.47.1)\n", - "Collecting datasets>=2.16.0 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.0)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (5.9.5)\n", - "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.45.1)\n", - "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.2.1)\n", - "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.14.0)\n", - "Collecting protobuf<6,>=3.20 (from streamlit->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)\n", - "Collecting hf_transfer (from unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", - "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.5.2)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.21.1)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", - "Collecting xxhash (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", - "Collecting fsspec>=2023.5.0 (from huggingface-hub->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.11.11)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (4.0.12)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python->structured-qa==0.3.3.dev56+gd125b79) (3.0.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev56+gd125b79) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2.18.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (3.4.2)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.1.105)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.13.1)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (12.6.85)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2024.11.6)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.21.0)\n", - "Collecting cut_cross_entropy (from unsloth_zoo>=2025.1.4->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\n", - "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.16)\n", - "Collecting shtab>=1.5.6 (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79)\n", - " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth->structured-qa==0.3.3.dev56+gd125b79) (4.4.1)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth->structured-qa==0.3.3.dev56+gd125b79) (1.18.3)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev56+gd125b79) (5.0.2)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev56+gd125b79) (1.17.0)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m288.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth-2025.1.6-py3-none-any.whl (175 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m175.5/175.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.1/162.1 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m57.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m53.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl (15.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m86.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tyro-0.9.12-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.7/115.7 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", - "Downloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\n", - "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire, llama-cpp-python\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev56+gd125b79-py3-none-any.whl size=16254 sha256=44ec8b803647a36e38d429c99dca9a41465cabf732b89ee30d835b5bf7ef397a\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-7rr3qwwf/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=641ac0cca0f14ec4115b7b865280c1a7e23973690749a2ffed3794756c4dbe0d\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-k7clz5in/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ae95bde960b4f7822e181432b7e15ac481d7e46da0b22e2e1070da6763218641\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=af951b418ae7e04eab976db91832b61bade5b62b8d6eeb63df48c5254e6bb4ad\n", " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.6-cp311-cp311-linux_x86_64.whl size=4022116 sha256=7fdfa0bf98f5cf71204e7497d168a59fa164e54a9c7adecfd4bef61dd1277b44\n", - " Stored in directory: /root/.cache/pip/wheels/e8/96/d2/acfb576f7a58ef0580e2fec8096e5eefd17cc356017089337b\n", - "Successfully built structured-qa fire llama-cpp-python\n", - "Installing collected packages: xxhash, watchdog, shtab, pymupdf, protobuf, loguru, hf_transfer, fsspec, fire, diskcache, dill, pymupdf4llm, pydeck, multiprocess, llama-cpp-python, tyro, xformers, datasets, cut_cross_entropy, bitsandbytes, trl, streamlit, unsloth_zoo, unsloth, structured-qa\n", - " Attempting uninstall: protobuf\n", - " Found existing installation: protobuf 4.25.5\n", - " Uninstalling protobuf-4.25.5:\n", - " Successfully uninstalled protobuf-4.25.5\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2024.10.0\n", - " Uninstalling fsspec-2024.10.0:\n", - " Successfully uninstalled fsspec-2024.10.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\n", - "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n", - "tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= \"3.11\", but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-25.1.1 datasets-3.2.0 dill-0.3.8 diskcache-5.6.3 fire-0.7.0 fsspec-2024.9.0 hf_transfer-0.1.9 llama-cpp-python-0.3.6 loguru-0.7.3 multiprocess-0.70.16 protobuf-3.20.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 shtab-1.7.1 streamlit-1.41.1 structured-qa-0.3.3.dev56+gd125b79 trl-0.13.0 tyro-0.9.12 unsloth-2025.1.6 unsloth_zoo-2025.1.5 watchdog-6.0.0 xformers-0.0.29.post1 xxhash-3.5.0\n" + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 streamlit-1.41.1 structured-qa-0.3.3.dev71+gae325d3 watchdog-6.0.0\n" ] - }, - { - "data": { - "application/vnd.colab-display-data+json": { - "id": "6b04f797f18f4c54948485ed45a8dacd", - "pip_warning": { - "packages": [ - "google" - ] - } - } - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ @@ -296,23 +161,23 @@ "base_uri": "https://localhost:8080/" }, "id": "S22kTrfPlyhU", - "outputId": "f77e1fb9-837a-4674-85f4-bc4e54815ed0" + "outputId": "ef9530ba-1b0f-4436-cde6-536094af655a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "--2025-01-23 10:00:12-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "--2025-01-28 14:06:00-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", - "Length: 14711 (14K) [text/plain]\n", + "Length: 21734 (21K) [text/plain]\n", "Saving to: ‘structured_qa.csv’\n", "\n", - "structured_qa.csv 100%[===================>] 14.37K --.-KB/s in 0.003s \n", + "structured_qa.csv 100%[===================>] 21.22K --.-KB/s in 0.007s \n", "\n", - "2025-01-23 10:00:19 (5.28 MB/s) - ‘structured_qa.csv’ saved [14711/14711]\n", + "2025-01-28 14:06:01 (3.14 MB/s) - ‘structured_qa.csv’ saved [21734/21734]\n", "\n" ] } @@ -332,7 +197,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": { "id": "iJ812u2llyhV" }, @@ -351,8 +216,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 4, + "metadata": { + "id": "V9vfT0kwOnCI" + }, "outputs": [], "source": [ "from loguru import logger" @@ -369,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "metadata": { "id": "oFU-eYMVlyhX" }, @@ -421,8 +288,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "id": "rrASU0BIOnCI" + }, "outputs": [], "source": [ "from structured_qa.model_loaders import load_gemini_model" @@ -494,431 +363,566 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "AZBwRnfjlyhZ", - "outputId": "1cea7c23-9edf-45dc-d58d-09ca5b41d193" + "outputId": "e3713b0b-3791-441f-ba5c-d3897fbe2468" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2025-01-23 10:03:52.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:52.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:52.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:56.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:03:56.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:03.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:06:05.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:05.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:07.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:07.491\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:12.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:06:12.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:27.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Responsible AI\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:07:27.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:33.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Assessing Responsible Al\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:08:33.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:33.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"Chapter 1: Research and Development\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:09:33.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:37.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.3 Frontier Al Research\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:10:37.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"2.2 Language\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:11:47.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-28 14:12:50.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"1.3 Frontier Al Research\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:12:50.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:07.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"7.1 Overview of Al Policy in 2023\",\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:14:07.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:14.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"7.4 Al Regulation\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:15:14.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:15.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"6.1 Postsecondary CS and Al Education\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:16:15.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:21.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"8.1 Al Postsecondary Education\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:21.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:22.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:26.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:26.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:32.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3 Model Architecture\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:03.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:10.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:17:32.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 3442.83ms\n", + "\u001b[32m2025-01-28 14:17:39.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", " \"answer\": 6\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:17:39.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:44.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", " \"answer\": 6\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:14.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:21.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:17:44.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:49.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3.2.2 Multi-Head Attention\",\n", " \"answer\": 8\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:21.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:28.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:17:49.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:53.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3.4 Embeddings and Softmax\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:28.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:35.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:17:53.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-28 14:17:58.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"6.2 Model Variations\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:35.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:42.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:17:58.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 3822.10ms\n", + "\u001b[32m2025-01-28 14:18:06.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5.2 Hardware and Schedule\",\n", " \"answer\": 8\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:42.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:47.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:18:06.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:12.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5.2 Hardware and Schedule\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:47.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:51.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:18:12.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:16.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5.3 Optimizer\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:04:51.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:51.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:55.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:18:16.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-28 14:18:23.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5.3 Optimizer\",\n", " \"answer\": 4000\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:05:55.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:00.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Training\",\n", + "\u001b[32m2025-01-28 14:18:23.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:23.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:28.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.4 Regularization\",\n", " \"answer\": \"0.1\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:00.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685v2.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:01.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:04.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:14.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. OUR METHOD\",\n", + "\u001b[32m2025-01-28 14:19:28.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:29.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:29.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:33.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:33.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:41.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"4 OUR METHOD\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:14.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How much memory is saved (in GB) when training GPT-3 175B with LoRA compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:22.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Practical Benefits and Limitations.\",\n", - " \"answer\": 0.85\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:22.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:30.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:19:41.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-28 14:19:53.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"ABSTRACT\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:30.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:42.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"1. INTRODUCTION\",\n", - "\"answer\": \"175\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:42.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:52.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"4.1 Low-Rank-Parametrized Update Matrices\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:52.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:53.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:56.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:06:56.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:04.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3 Arithmetic Reasoning\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:04.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:13.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:19:53.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:03.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"1 INTRODUCTION\",\n", + " \"answer\": \"175\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:03.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:11.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"4. Our method\",\n", + "\"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:11.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:11.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:11.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:15.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:15.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:25.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"3 Arithmetic Reasoning\",\n", + "\"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:25.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:35.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", "\"section\": \"3.1 Experimental Setup\",\n", "\"answer\": 5\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:13.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:21.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:20:35.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:43.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", "\"section\": \"3.1 Experimental Setup\",\n", - " \"answer\": 5\n", + "\"answer\": 5\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:21.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:31.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:20:43.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:51.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"5 Symbolic Reasoning\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:20:51.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:07.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", "\"section\": \"5 Symbolic Reasoning\",\n", - "\"answer\": \"Yes\"\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:07.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:17.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"5 Symbolic Reasoning\",\n", + " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:31.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:41.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:21:17.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:26.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", "\"section\": \"3.4 Robustness of Chain of Thought\",\n", "\"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:41.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many random samples for examined to understand model errors?\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:50.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:21:26.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:40.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3.2 Results\",\n", - " \"answer\": \"50\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:07:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5 Symbolic Reasoning\",\n", - " \"answer\": \"A\"\n", + " \"answer\": \"100\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:40.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:41.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:41.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:45.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:45.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:49.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"2.4. Recurrent Networks\",\n", + " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:01.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:05.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:05.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:09.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:21:49.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:53.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3. Experimental Results\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:09.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does the model use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:13.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. Decision Tree Analysis of Neural Networks\",\n", + "\u001b[32m2025-01-28 14:21:53.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-28 14:21:57.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:13.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many parameters are in the y = x^2 toy model tree?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:16.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", - " \"answer\": \"39\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:16.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:20.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2.4. Recurrent Networks\",\n", - " \"answer\": \"Yes\"\n", + "\u001b[32m2025-01-28 14:21:57.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:00.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"3. Experimental Results\",\n", + "\"answer\": 14\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", + "\u001b[32m2025-01-28 14:22:00.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:05.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"Experimental Results\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:23.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:27.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:22:05.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:09.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"4. Conclusion\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:28.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:32.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:44.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Introduction\",\n", + "\u001b[32m2025-01-28 14:22:09.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:11.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:11.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:14.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:14.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:32.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. How “Open” Do You Want to Make Your Work?\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:58.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:08:58.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is the term of office for members of the advisory board of the Authors Alliance? -A: The source does not specify a term of office for the advisory board. -B: 2 years -C: 4 years\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:09.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"5. Acknowledgements\",\n", + "\u001b[32m2025-01-28 14:22:32.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-28 14:22:53.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:23.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Introduction\",\n", + "\u001b[32m2025-01-28 14:22:53.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-28 14:23:07.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. Benefits of Open Access\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:23.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:34.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:23:07.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-28 14:23:29.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:34.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In what year did the Bull and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:49.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:23:29.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-28 14:23:49.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"3. Open Access Policies\",\n", - " \"answer\": 2015\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:09:49.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:01.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Chapter 5\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:01.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:02.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:06.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", - " \"answer\": \"B\"\n", + " \"answer\": \"2015\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:23:49.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:01.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"Chapter 5\",\n", + "\"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:01.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:03.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:07.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:07.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:25.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", + "\"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:20.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:37.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:24:25.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:41.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:37.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the daylight factor required for façades with exterior obstructions?\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:53.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:24:41.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-01-28 14:24:54.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"4. VISUAL COMFORT\",\n", - " \"answer\": \"0.7%\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:10:53.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:07.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:11.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:15.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:15.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of AI systems are banned by the AI Act? -A: High-risk systems, -B: Manipulative systems, -C: Real-time biometric systems in public spaces\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:43.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 5\",\n", + "\u001b[32m2025-01-28 14:24:54.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-28 14:25:07.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"I.1.3. OCCUPATIONAL SAFETY\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:25:07.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:25:08.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:25:08.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:25:11.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:25:11.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-28 14:26:08.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2. Thread Hierarchy\",\n", + " \"answer\": 1024\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:26:08.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-01-28 14:27:06.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"5.2. Thread Hierarchy\",\n", + "\"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:27:06.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-01-28 14:28:04.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"6.1.1. Compilation Workflow\",\n", "\"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:11:43.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:06.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:28:04.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-01-28 14:29:14.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"6.1.1.1 Offline Compilation\",\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:29:14.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-28 14:30:17.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"6.1.1.2 Just-in-Time Compilation\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:30:17.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-01-28 14:31:26.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"6.1.1.2 Just-in-Time Compilation\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:31:26.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-28 14:32:27.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"15.3. API Fundamentals\",\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:32:27.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-01-28 14:33:35.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"15 Graph Memory Nodes\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:33:35.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-01-28 14:34:37.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"15.3. API Fundamentals\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:34:37.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-28 14:35:32.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"15.3. API Fundamentals\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:35:32.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:36:32.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-01-28 14:37:40.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"23.1. What is Lazy Loading?\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:37:40.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-01-28 14:38:37.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"23.1. What is Lazy Loading?\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:38:37.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-28 14:38:39.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689 to ?uri=OJ:L_202401689.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:38:39.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:38:44.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:38:44.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-28 14:39:10.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"Article 10\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:39:10.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-28 14:39:30.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", "\"section\": \"Article 51\",\n", "\"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:26.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:44.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. (29)\",\n", + "\u001b[32m2025-01-28 14:39:30.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-28 14:39:52.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"(133)\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:12:44.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:02.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 73\",\n", - "\"answer\": \"B\"\n", + "\u001b[32m2025-01-28 14:39:52.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-28 14:40:13.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"Article 73\",\n", + " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:02.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:40:13.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-28 14:40:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"Article 60\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:20.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:37.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 99\",\n", - " \"answer\": \"A\"\n", + "\u001b[32m2025-01-28 14:40:32.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-28 14:40:52.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 99\",\n", + "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:13:37.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:08.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:40:52.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-28 14:41:10.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", "\"section\": \"Article 56\",\n", "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:08.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:41:10.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-28 14:41:40.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"Article 79\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:14:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:30.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:51.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:41:40.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-28 14:41:57.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", "\"section\": \"Article 18\",\n", "\"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:15:51.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:09.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 65\",\n", - " \"answer\": \"B\"\n", + "\u001b[32m2025-01-28 14:41:57.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:17.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"Article 65\",\n", + "\"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:09.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:15.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:18.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:18.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:28.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:42:17.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:19.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:19.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:23.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:23.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:32.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"OVERVIEW AND GOAL\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:28.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:42:32.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:41.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"END OF THE GAME\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:38.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:45.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:42:41.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:50.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"6\"\n", + " \"answer\": 6\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:45.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:42:50.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-28 14:42:58.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"Turn overview\",\n", " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:16:54.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:03.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CHAPTER OVERVIEW\",\n", - " \"answer\": \"No\"\n", + "\u001b[32m2025-01-28 14:42:58.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:43:08.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"Turn overview\",\n", + " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:03.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:13.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"A. Take a Chapter card\",\n", + "\u001b[32m2025-01-28 14:43:08.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-28 14:43:16.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"CHAPTER OVERVIEW\",\n", " \"answer\": \"3\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:13.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:21.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Chapter Overview\",\n", + "\u001b[32m2025-01-28 14:43:16.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:43:25.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"B. Take a Landmark tile\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:21.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:29.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CARD AND TILE COSTS\",\n", - " \"answer\": \"Yes\"\n", + "\u001b[32m2025-01-28 14:43:25.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-28 14:43:34.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"4. CARD AND TILE COSTS\",\n", + "\"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:29.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:39.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:43:34.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-28 14:43:43.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"4. CARD AND TILE COSTS\",\n", " \"answer\": 2\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:17:39.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:39.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:48.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:43:43.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-28 14:43:52.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"No\"\n", + " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:48.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:57.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - "\"section\": \"CARD AND TILE EFFECTS\",\n", - "\"answer\": \"B\"\n", + "\u001b[32m2025-01-28 14:43:52.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:44:52.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:01.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"CARD AND TILE EFFECTS\",\n", + " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:18:57.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:45:01.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:10.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"5. CARD AND TILE EFFECTS\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:13.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", - " \"answer\": \"No\"\n", + "\u001b[32m2025-01-28 14:45:10.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:18.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\"section\": \"CONQUERING MIDDLE-EARTH\",\n", + "\"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:13.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:22.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:45:18.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 8332.30ms\n", + "\u001b[32m2025-01-28 14:45:36.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"END OF THE GAME\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:22.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:30.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:45:36.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:46.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"END OF THE GAME\",\n", " \"answer\": \"7\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:30.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:32.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:36.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:36.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:44.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:45:46.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:50.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:50.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:53.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:53.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:45:59.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"LOOKOUT PHASE\",\n", " \"answer\": 4\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:44.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:51.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"LOOKOUT PHASE\",\n", + "\u001b[32m2025-01-28 14:45:59.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:06.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"6. GAME FLOW\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:51.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:58.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"ACTION PHASE\",\n", + "\u001b[32m2025-01-28 14:46:06.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:13.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"7. GAME FLOW\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:19:58.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:05.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:46:13.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:19.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"GAME FLOW\",\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:19.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:25.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"EXPEDITION PHASE\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:05.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:12.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME FLOW\",\n", - " \"answer\": 25\n", + "\u001b[32m2025-01-28 14:46:25.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:30.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"7. EXPEDITION PHASE\",\n", + " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:12.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:19.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME FLOW\",\n", + "\u001b[32m2025-01-28 14:46:30.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:37.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"EXPEDITION PHASE\",\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:37.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:43.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"CLEANUP PHASE\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:19.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:27.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:46:43.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:49.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"9. ACTIONS\",\n", " \"answer\": \"1\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:27.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:36.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:46:49.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:56.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"section\": \"ACTIONS\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-01-28 14:46:56.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-01-28 14:47:56.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-28 14:48:02.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"ACTIONS\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:36.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:43.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:48:02.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-28 14:48:07.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"GAME END\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-23 10:20:43.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-23 10:21:43.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-23 10:21:50.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m30\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-01-28 14:48:07.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-28 14:48:13.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", " \"section\": \"GAME END\",\n", - " \"answer\": \"Yes\"\n", + " \"answer\": \"A\"\n", "}\u001b[0m\n" ] } @@ -966,25 +970,25 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 238 + "height": 175 }, "id": "EYYJgWf6lyha", - "outputId": "347014fe-d6e6-4d0c-e094-dc5fcbed295e" + "outputId": "70ed5703-a6fb-42c9-a2c5-79a5639001db" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 5,\n \"max\": 74,\n \"num_unique_values\": 6,\n \"samples\": [\n 5,\n 13,\n 74\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"3 Model Architecture\",\n \"3 Experimental Results\",\n \"Natural lighting\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Does the final model use learned positional embeddings?\",\n \"How many parameters are in the y = x^2 toy model tree?\",\n \"What is the daylight factor required for fa\\u00e7ades with exterior obstructions?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"14\",\n \"0.7\",\n \"850\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"39\",\n \"0.7%\",\n \"0.85\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"6.2 Model Variations\",\n \"Table 1. Computation and memory analysis of toy problems.\",\n \"4. VISUAL COMFORT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "summary": "{\n \"name\": \"results\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 34,\n \"min\": 5,\n \"max\": 88,\n \"num_unique_values\": 4,\n \"samples\": [\n 49,\n 88,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"CHAPTER OVERVIEW\",\n \"23.1. What is Lazy Loading?\",\n \"3.5 Positional Encoding\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Can you take a Chapter card and a Landmark tile on your same turn?\",\n \"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\",\n \"Does the final model use learned positional embeddings?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Turn overview\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe" }, "text/html": [ "\n", - "
\n", + "
\n", "
\n", "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Classification of general-purpose AI models as...What is the threshold, measured in floating po...CNO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCU...NaN
4444https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?67NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5555https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BNONE OF THE ABOVENaN
6666https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEHow many victory points you get from each conq...1THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM P...NaN
8383https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsWhen are virtual addresses assigned to graph a...CANaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "44 44 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "66 66 https://github.com/mozilla-ai/structured-qa/re... \n", + "83 83 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "\n", + " section \\\n", + "22 Classification of general-purpose AI models as... \n", + "44 3.2 Results \n", + "47 CARD AND TILE EFFECTS \n", + "52 CARD AND TILE COSTS \n", + "55 CARD AND TILE EFFECTS \n", + "66 EXPEDITION PHASE \n", + "83 15.3. API Fundamentals \n", + "\n", + " question answer \\\n", + "22 What is the threshold, measured in floating po... C \n", + "44 How many random samples were examined to under... 100 \n", + "47 How many different races are there? 6 \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "55 Which type of cards provide coins? -A: Gray -B... B \n", + "66 How many victory points you get from each conq... 1 \n", + "83 When are virtual addresses assigned to graph a... C \n", + "\n", + " pred_answer pred_section \n", + "22 NO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCU... NaN \n", + "44 50 NaN \n", + "47 7 NaN \n", + "52 NO NaN \n", + "55 NONE OF THE ABOVE NaN \n", + "66 THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM P... NaN \n", + "83 A NaN " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c4z9XxXWq3m1", + "outputId": "327b1131-3ed5-40ff-f68d-48b0727873c2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9292929292929293" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "UXg_TC7R28QI" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "004ad74940344b6eb376ae4cfc85f26b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "024598891b4f46299dc20b5cfd714e0c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_45ce30572c20425691ebdabe0696b0ec", + "placeholder": "​", + "style": "IPY_MODEL_667241a7a4e6442b9e32450dbcbb0f56", + "value": "tokenizer.json: 100%" + } + }, + "066c98c9848e4e00b68d0e98ec6f3c1f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c8939bbe84c24ff8ad43c8d996d29af2", + "placeholder": "​", + "style": "IPY_MODEL_9a8317a8c8754d4d8b513a7fb0366c8d", + "value": "config.json: 100%" + } + }, + "0bad96f6403c4042a9ed7bb491c1b25d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a42220b511f14cd8b89f5071c0d216a4", + "max": 1633, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_46097609bd4b46fa94c27a5dcfe98a1a", + "value": 1633 + } + }, + "0bebf69871bb4d04a5329ecb32d64b06": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "11c36278698f4a6e8f606811eaff2166": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "13dd434100e747588f8be140f55305a3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1601603b8da04598b2a3b1b6532b9de9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "167d14dc1f3b42fe9f4d9cc2ec341363": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1c9e0ff5abab4e378959f47c5655f9f7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e3084de2764a48089029ffafa1087e8a", + "placeholder": "​", + "style": "IPY_MODEL_420798f709e2420d81d7223c34ca442e", + "value": " 1.63k/1.63k [00:00<00:00, 72.1kB/s]" + } + }, + "1e7fcaa0156d4f09af4bf24a8607e787": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5e532f20ae6d4a5c90d5beba1518d3ee", + "placeholder": "​", + "style": "IPY_MODEL_50215024305b41c38aec0a3808b3bc84", + "value": "artifact.metadata: 100%" + } + }, + "1eda4198a078469dbba236c3ed8654c3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "20d67e8902244d87ad72120b9fb71284": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1e7fcaa0156d4f09af4bf24a8607e787", + "IPY_MODEL_0bad96f6403c4042a9ed7bb491c1b25d", + "IPY_MODEL_1c9e0ff5abab4e378959f47c5655f9f7" + ], + "layout": "IPY_MODEL_dd0ddf2594eb42b4babe6eeaf6a59bbb" + } + }, + "24ae74e4073749fba785b660dac48f4c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_944a78e6adaf4e3a87551d0bd5a6fc75", + "placeholder": "​", + "style": "IPY_MODEL_8d7d0da8d2344625aeef3d1c452a9c68", + "value": "special_tokens_map.json: 100%" + } + }, + "266e8497e8b04e3fad5d23391960ed13": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "27ec9d176d11451bb049b62c278a86ff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "29d523b694174b7596944eeb86a553d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_73d7ade0b58b41d1b1ac109026eeabc8", + "IPY_MODEL_cc1c0fcd84b94a199612c3e7ccd906cd", + "IPY_MODEL_5dbe5cc9d6e64e5cb62e7018a42e1f8e" + ], + "layout": "IPY_MODEL_56de5716ee0146158e399759aef55c41" + } + }, + "2a59d91e7621422ebda4fefca0ee6760": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "38bd9b6cec8f42f1a9b2caca71478f4b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3919381f1ae247219c7e4378a5d2e1ff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3bd3d79c0262467296061f64606e57ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_024598891b4f46299dc20b5cfd714e0c", + "IPY_MODEL_9846ac95a9864f6aad40bffcd1595c48", + "IPY_MODEL_f7e1a279ca7a4576a67d600c6e0fcad6" + ], + "layout": "IPY_MODEL_1eda4198a078469dbba236c3ed8654c3" + } + }, + "420798f709e2420d81d7223c34ca442e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "44b4d7daccdb46f19db7675c3a7d4f49": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "45ce30572c20425691ebdabe0696b0ec": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "46097609bd4b46fa94c27a5dcfe98a1a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4e15263fae0140299c6a55ce95f7bd43": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f1165cdc7ef4701889d0e6de6ac9ed1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "50215024305b41c38aec0a3808b3bc84": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "54af3da7793c404fa8b4e1062185ea68": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_24ae74e4073749fba785b660dac48f4c", + "IPY_MODEL_895f37ac364f4c1aa4b3089fa286fca3", + "IPY_MODEL_f63e1751a94246888bf0426a2288cb36" + ], + "layout": "IPY_MODEL_90076a55ec674636b93c7b1d741ea374" + } + }, + "56de5716ee0146158e399759aef55c41": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5b034562b2354e70a27bc06f5fe674cd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5dbe5cc9d6e64e5cb62e7018a42e1f8e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_be5d5dcca8cb498d8eb982b1cc1273fc", + "placeholder": "​", + "style": "IPY_MODEL_f6e97fcb881443beaec839bd64530d2d", + "value": " 438M/438M [00:02<00:00, 248MB/s]" + } + }, + "5e532f20ae6d4a5c90d5beba1518d3ee": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f145f7ffcd540149cd775f01e3da418": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f76cfc2d293d4b409e4fc8bfa805af96", + "placeholder": "​", + "style": "IPY_MODEL_167d14dc1f3b42fe9f4d9cc2ec341363", + "value": "tokenizer_config.json: 100%" + } + }, + "667241a7a4e6442b9e32450dbcbb0f56": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6766b3d159fd4c29b853f3ad44616429": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3919381f1ae247219c7e4378a5d2e1ff", + "max": 405, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2a59d91e7621422ebda4fefca0ee6760", + "value": 405 + } + }, + "67f985db0d7b41f7b15f135d6acb039e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_97d1348ebec44687ac2a9151d52b1e8f", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ae77818599cd4bc2ac761865e81c3f15", + "value": 231508 + } + }, + "6da62a5ad31940329f00748ad6eab4da": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_44b4d7daccdb46f19db7675c3a7d4f49", + "placeholder": "​", + "style": "IPY_MODEL_ee9dca0e0f2c49a49fb50b623818cda9", + "value": " 232k/232k [00:00<00:00, 1.77MB/s]" + } + }, + "73d7ade0b58b41d1b1ac109026eeabc8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f05356467fa4c2ab321004efa06e9c9", + "placeholder": "​", + "style": "IPY_MODEL_ed08c56e20194dbca6732642fb4af466", + "value": "model.safetensors: 100%" + } + }, + "747558448b5e40038b270a6a6f6af6f0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d4aa0529fb74e81a08cc12aeb243456": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4e15263fae0140299c6a55ce95f7bd43", + "placeholder": "​", + "style": "IPY_MODEL_cacdb3a3a0e04ca3b744fb82a3dcc925", + "value": "vocab.txt: 100%" + } + }, + "7f05356467fa4c2ab321004efa06e9c9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "81a8270d87ef4c9b80c46c5236c8292f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7d4aa0529fb74e81a08cc12aeb243456", + "IPY_MODEL_67f985db0d7b41f7b15f135d6acb039e", + "IPY_MODEL_6da62a5ad31940329f00748ad6eab4da" + ], + "layout": "IPY_MODEL_27ec9d176d11451bb049b62c278a86ff" + } + }, + "81ee5fe4f8044ab9819b9f767c41826e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8209dde69d4147739c522342bfedcccd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_066c98c9848e4e00b68d0e98ec6f3c1f", + "IPY_MODEL_c88515f583bd469ca0d6ca54a812ca14", + "IPY_MODEL_a47e31ce610b4dcf8ac934ec11aefc65" + ], + "layout": "IPY_MODEL_38bd9b6cec8f42f1a9b2caca71478f4b" + } + }, + "895f37ac364f4c1aa4b3089fa286fca3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_747558448b5e40038b270a6a6f6af6f0", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_81ee5fe4f8044ab9819b9f767c41826e", + "value": 112 + } + }, + "8d18d3f17569471fade4a2df380a245c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8d7d0da8d2344625aeef3d1c452a9c68": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "90076a55ec674636b93c7b1d741ea374": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "944a78e6adaf4e3a87551d0bd5a6fc75": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "97d1348ebec44687ac2a9151d52b1e8f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9846ac95a9864f6aad40bffcd1595c48": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_11c36278698f4a6e8f606811eaff2166", + "max": 466081, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c8a050cfb1164c1cbecb0a86bc555d9c", + "value": 466081 + } + }, + "9a8317a8c8754d4d8b513a7fb0366c8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a2a6f8043e9943c7a6ec9112ac3d33bd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a42220b511f14cd8b89f5071c0d216a4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a47e31ce610b4dcf8ac934ec11aefc65": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a2a6f8043e9943c7a6ec9112ac3d33bd", + "placeholder": "​", + "style": "IPY_MODEL_8d18d3f17569471fade4a2df380a245c", + "value": " 743/743 [00:00<00:00, 64.1kB/s]" + } + }, + "ae77818599cd4bc2ac761865e81c3f15": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "be5d5dcca8cb498d8eb982b1cc1273fc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c88515f583bd469ca0d6ca54a812ca14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ea0ed18363ec4a86b0383e0b43d38ac7", + "max": 743, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dfb0d5f3c9ae46dc910d335a9215521a", + "value": 743 + } + }, + "c8939bbe84c24ff8ad43c8d996d29af2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c8a050cfb1164c1cbecb0a86bc555d9c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cacdb3a3a0e04ca3b744fb82a3dcc925": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cb69dbb882694ed3bab1a2b35e0df524": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cc1c0fcd84b94a199612c3e7ccd906cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_266e8497e8b04e3fad5d23391960ed13", + "max": 438349816, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0bebf69871bb4d04a5329ecb32d64b06", + "value": 438349816 + } + }, + "d1b166882cef441c816a75b784b3dcb0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5f145f7ffcd540149cd775f01e3da418", + "IPY_MODEL_6766b3d159fd4c29b853f3ad44616429", + "IPY_MODEL_e3ec24ca9f384b6e8a6b25f66c9a2872" + ], + "layout": "IPY_MODEL_5b034562b2354e70a27bc06f5fe674cd" + } + }, + "d44706bfc8494edc8f266d3a94ff16a2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dd0ddf2594eb42b4babe6eeaf6a59bbb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dfb0d5f3c9ae46dc910d335a9215521a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e3084de2764a48089029ffafa1087e8a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e3ec24ca9f384b6e8a6b25f66c9a2872": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d44706bfc8494edc8f266d3a94ff16a2", + "placeholder": "​", + "style": "IPY_MODEL_13dd434100e747588f8be140f55305a3", + "value": " 405/405 [00:00<00:00, 30.5kB/s]" + } + }, + "ea0ed18363ec4a86b0383e0b43d38ac7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ed08c56e20194dbca6732642fb4af466": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ee9dca0e0f2c49a49fb50b623818cda9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f63e1751a94246888bf0426a2288cb36": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f1165cdc7ef4701889d0e6de6ac9ed1", + "placeholder": "​", + "style": "IPY_MODEL_1601603b8da04598b2a3b1b6532b9de9", + "value": " 112/112 [00:00<00:00, 8.30kB/s]" + } + }, + "f6e97fcb881443beaec839bd64530d2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f76cfc2d293d4b409e4fc8bfa805af96": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7e1a279ca7a4576a67d600c6e0fcad6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_004ad74940344b6eb376ae4cfc85f26b", + "placeholder": "​", + "style": "IPY_MODEL_cb69dbb882694ed3bab1a2b35e0df524", + "value": " 466k/466k [00:00<00:00, 3.50MB/s]" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/benchmark/qwen_2_5_7B_perfect_context.ipynb b/benchmark/qwen_2_5_7B_perfect_context.ipynb index e18825d..e0f5d97 100644 --- a/benchmark/qwen_2_5_7B_perfect_context.ipynb +++ b/benchmark/qwen_2_5_7B_perfect_context.ipynb @@ -48,8 +48,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Cloning into 'structured-qa'...\n", "remote: Enumerating objects: 795, done.\u001b[K\n", @@ -78,8 +78,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Processing ./structured-qa\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", @@ -154,18 +154,18 @@ ] }, { - "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { + "id": "df14365dea1e4a95896a43fb5764312a", "pip_warning": { "packages": [ "structured_qa" ] - }, - "id": "df14365dea1e4a95896a43fb5764312a" + } } }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -174,27 +174,27 @@ }, { "cell_type": "code", - "source": [ - "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" - ], + "execution_count": 6, "metadata": { - "id": "mZtwFXA5IOvn", - "outputId": "c3b6fd2a-27f5-44e7-b5f6-b05bde51a979", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "mZtwFXA5IOvn", + "outputId": "c3b6fd2a-27f5-44e7-b5f6-b05bde51a979" }, - "execution_count": 6, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } + ], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" ] }, { @@ -247,9 +247,6 @@ }, "outputs": [], "source": [ - "import time\n", - "\n", - "\n", "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", "You are a rigorous assistant answering questions.\n", "You only answer based on the current information available.\n", @@ -330,8 +327,6 @@ "cell_type": "code", "execution_count": 7, "metadata": { - "id": "ObsvwlNslyhZ", - "outputId": "f8e3573d-30d7-4f57-d8eb-d430d42e3755", "colab": { "base_uri": "https://localhost:8080/", "height": 173, @@ -348,12 +343,14 @@ "eaa38024ac24497080f3cff4ddc5b39b", "a4935eb54fc2442fa243555f0b572ca0" ] - } + }, + "id": "ObsvwlNslyhZ", + "outputId": "f8e3573d-30d7-4f57-d8eb-d430d42e3755" }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", @@ -364,18 +361,18 @@ ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", "\u001b[32m2025-01-29 13:02:00.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", @@ -717,81 +714,11 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "14 14 https://arxiv.org/pdf/2210.05189 \n", - "16 16 https://arxiv.org/pdf/2210.05189 \n", - "21 21 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "38 38 https://arxiv.org/pdf/2201.11903 \n", - "42 42 https://arxiv.org/pdf/2201.11903 \n", - "43 43 https://arxiv.org/pdf/2201.11903 \n", - "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", - "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", - "72 72 https://github.com/mozilla-ai/structured-qa/re... \n", - "76 76 https://commission.europa.eu/document/download... \n", - "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "82 82 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "85 85 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "\n", - " section \\\n", - "10 5.4 Regularization \n", - "14 3 Experimental Results \n", - "16 3 Experimental Results \n", - "21 Data and data governance \n", - "38 3.1 Experimental Setup \n", - "42 5 Symbolic Reasoning \n", - "43 3.4 Robustness of Chain of Thought \n", - "47 CARD AND TILE EFFECTS \n", - "51 CHAPTER OVERVIEW \n", - "52 CARD AND TILE COSTS \n", - "65 EXPEDITION PHASE \n", - "72 GAME END \n", - "76 1.2.1. Internal partitions and doors \n", - "78 5.2. Thread Hierarchy \n", - "82 6.1.1. Compilation Workflow \n", - "85 15.3. API Fundamentals \n", - "\n", - " question answer \\\n", - "10 What was the dropout rate used for the base mo... 0.1 \n", - "14 How many parameters are in the toy model (y = ... 14 \n", - "16 What is the main computational advantage of de... B \n", - "21 what is a requirement for datasets used in hig... B \n", - "38 How many large language models were evaluated? 5 \n", - "42 Which symbolic reasoning task is used as an ou... A \n", - "43 How many annotators provided independent chain... 3 \n", - "47 How many different races are there? 6 \n", - "51 After taking a landmark tile, do you reveal a ... NO \n", - "52 Can a player pay coins to compensate for missi... YES \n", - "65 Do you need a fish to conquer a distant island? YES \n", - "72 If player 1 has 30 Victory points and 4 worker... A \n", - "76 What fire resistance must vertical partitions ... A \n", - "78 Can you identify a thread with a four-dimensio... NO \n", - "82 What happens to the compiled binary code after... A \n", - "85 When does a graph allocation's lifetime end? -... B \n", - "\n", - " pred_answer pred_section \n", - "10 PDROP = 0.1 NaN \n", - "14 NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN T... NaN \n", - "16 B: FEWER OPERATIONS NaN \n", - "21 B: DATASETS ENSURING QUALITY AND DIVERSITY NaN \n", - "38 FIVE NaN \n", - "42 A\\nBASED ON THE INFORMATION PROVIDED, THE OUT-... NaN \n", - "43 2 NaN \n", - "47 5 NaN \n", - "51 YES NaN \n", - "52 NO NaN \n", - "65 NO NaN \n", - "72 A: PLAYER 1 NaN \n", - "76 A: EI30 NaN \n", - "78 I NEED MORE INFO. NaN \n", - "82 A: IT IS CACHED FOR LATER USE AND TO AVOID REC... NaN \n", - "85 B: WHEN THE EXECUTION REACHES THE FREEING GRAP... NaN " - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 16,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 10,\n \"max\": 85,\n \"num_unique_values\": 16,\n \"samples\": [\n 10,\n 14,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"EXPEDITION PHASE\",\n \"1.2.1. Internal partitions and doors\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 16,\n \"samples\": [\n \"What was the dropout rate used for the base model?\",\n \"How many parameters are in the toy model (y = x^2) tree?\",\n \"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"NO\",\n \"14\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"NO\",\n \"A: EI30\",\n \"PDROP = 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, "text/html": [ "\n", "
\n", @@ -1196,13 +1123,83 @@ "
\n", "
\n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 16,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 10,\n \"max\": 85,\n \"num_unique_values\": 16,\n \"samples\": [\n 10,\n 14,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"EXPEDITION PHASE\",\n \"1.2.1. Internal partitions and doors\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 16,\n \"samples\": [\n \"What was the dropout rate used for the base model?\",\n \"How many parameters are in the toy model (y = x^2) tree?\",\n \"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"NO\",\n \"14\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"NO\",\n \"A: EI30\",\n \"PDROP = 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "14 14 https://arxiv.org/pdf/2210.05189 \n", + "16 16 https://arxiv.org/pdf/2210.05189 \n", + "21 21 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "38 38 https://arxiv.org/pdf/2201.11903 \n", + "42 42 https://arxiv.org/pdf/2201.11903 \n", + "43 43 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", + "72 72 https://github.com/mozilla-ai/structured-qa/re... \n", + "76 76 https://commission.europa.eu/document/download... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "82 82 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "85 85 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "14 3 Experimental Results \n", + "16 3 Experimental Results \n", + "21 Data and data governance \n", + "38 3.1 Experimental Setup \n", + "42 5 Symbolic Reasoning \n", + "43 3.4 Robustness of Chain of Thought \n", + "47 CARD AND TILE EFFECTS \n", + "51 CHAPTER OVERVIEW \n", + "52 CARD AND TILE COSTS \n", + "65 EXPEDITION PHASE \n", + "72 GAME END \n", + "76 1.2.1. Internal partitions and doors \n", + "78 5.2. Thread Hierarchy \n", + "82 6.1.1. Compilation Workflow \n", + "85 15.3. API Fundamentals \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "14 How many parameters are in the toy model (y = ... 14 \n", + "16 What is the main computational advantage of de... B \n", + "21 what is a requirement for datasets used in hig... B \n", + "38 How many large language models were evaluated? 5 \n", + "42 Which symbolic reasoning task is used as an ou... A \n", + "43 How many annotators provided independent chain... 3 \n", + "47 How many different races are there? 6 \n", + "51 After taking a landmark tile, do you reveal a ... NO \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "65 Do you need a fish to conquer a distant island? YES \n", + "72 If player 1 has 30 Victory points and 4 worker... A \n", + "76 What fire resistance must vertical partitions ... A \n", + "78 Can you identify a thread with a four-dimensio... NO \n", + "82 What happens to the compiled binary code after... A \n", + "85 When does a graph allocation's lifetime end? -... B \n", + "\n", + " pred_answer pred_section \n", + "10 PDROP = 0.1 NaN \n", + "14 NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN T... NaN \n", + "16 B: FEWER OPERATIONS NaN \n", + "21 B: DATASETS ENSURING QUALITY AND DIVERSITY NaN \n", + "38 FIVE NaN \n", + "42 A\\nBASED ON THE INFORMATION PROVIDED, THE OUT-... NaN \n", + "43 2 NaN \n", + "47 5 NaN \n", + "51 YES NaN \n", + "52 NO NaN \n", + "65 NO NaN \n", + "72 A: PLAYER 1 NaN \n", + "76 A: EI30 NaN \n", + "78 I NEED MORE INFO. NaN \n", + "82 A: IT IS CACHED FOR LATER USE AND TO AVOID REC... NaN \n", + "85 B: WHEN THE EXECUTION REACHES THE FREEING GRAP... NaN " + ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 13 + "output_type": "execute_result" } ], "source": [ @@ -1222,14 +1219,14 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.8383838383838383" ] }, + "execution_count": 14, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], "source": [ @@ -1248,9 +1245,10 @@ } ], "metadata": { + "accelerator": "GPU", "colab": { - "provenance": [], - "gpuType": "T4" + "gpuType": "T4", + "provenance": [] }, "kernelspec": { "display_name": "Python 3", @@ -1260,35 +1258,88 @@ "name": "python", "version": "3.10.12" }, - "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { - "92e5c930e73d4fce9d0d79de5b12fbc3": { + "0e89d5adf51e49d6a7a5e720af8c0f3d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1909a0309d9d4293a10da4abca6a4468": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4fbd860349a44d1faaaa708ee8d81cd0", - "IPY_MODEL_1909a0309d9d4293a10da4abca6a4468", - "IPY_MODEL_47f329f3d9c74758ad6e96ee76036b90" - ], - "layout": "IPY_MODEL_a96b9b499a11466da9b2efc8e55689c3" + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0e89d5adf51e49d6a7a5e720af8c0f3d", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b4092c4c9e124f6bb3ec25fbda77044a", + "value": 8098525888 } }, - "4fbd860349a44d1faaaa708ee8d81cd0": { + "47f329f3d9c74758ad6e96ee76036b90": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1300,113 +1351,89 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_e1bfae86937042d8a105f04e3ccdb697", + "layout": "IPY_MODEL_eaa38024ac24497080f3cff4ddc5b39b", "placeholder": "​", - "style": "IPY_MODEL_8733a460cd564e5f8b9177c4b37bc1d5", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + "style": "IPY_MODEL_a4935eb54fc2442fa243555f0b572ca0", + "value": " 8.10G/8.10G [03:12<00:00, 41.4MB/s]" } }, - "1909a0309d9d4293a10da4abca6a4468": { + "4fbd860349a44d1faaaa708ee8d81cd0": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", + "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_0e89d5adf51e49d6a7a5e720af8c0f3d", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b4092c4c9e124f6bb3ec25fbda77044a", - "value": 8098525888 + "layout": "IPY_MODEL_e1bfae86937042d8a105f04e3ccdb697", + "placeholder": "​", + "style": "IPY_MODEL_8733a460cd564e5f8b9177c4b37bc1d5", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" } }, - "47f329f3d9c74758ad6e96ee76036b90": { + "8733a460cd564e5f8b9177c4b37bc1d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "92e5c930e73d4fce9d0d79de5b12fbc3": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_eaa38024ac24497080f3cff4ddc5b39b", - "placeholder": "​", - "style": "IPY_MODEL_a4935eb54fc2442fa243555f0b572ca0", - "value": " 8.10G/8.10G [03:12<00:00, 41.4MB/s]" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4fbd860349a44d1faaaa708ee8d81cd0", + "IPY_MODEL_1909a0309d9d4293a10da4abca6a4468", + "IPY_MODEL_47f329f3d9c74758ad6e96ee76036b90" + ], + "layout": "IPY_MODEL_a96b9b499a11466da9b2efc8e55689c3" } }, - "a96b9b499a11466da9b2efc8e55689c3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "a4935eb54fc2442fa243555f0b572ca0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "description_width": "" } }, - "e1bfae86937042d8a105f04e3ccdb697": { + "a96b9b499a11466da9b2efc8e55689c3": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1455,25 +1482,26 @@ "width": null } }, - "8733a460cd564e5f8b9177c4b37bc1d5": { + "b4092c4c9e124f6bb3ec25fbda77044a": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "0e89d5adf51e49d6a7a5e720af8c0f3d": { + "e1bfae86937042d8a105f04e3ccdb697": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1522,26 +1550,10 @@ "width": null } }, - "b4092c4c9e124f6bb3ec25fbda77044a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, "eaa38024ac24497080f3cff4ddc5b39b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1589,25 +1601,10 @@ "visibility": null, "width": null } - }, - "a4935eb54fc2442fa243555f0b572ca0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } } } } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 6b0a0c18f16d9c1b8cf8a0bdfe7f7c8ed66426bc Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 30 Jan 2025 13:25:52 +0100 Subject: [PATCH 088/120] Update qwen notebooks --- benchmark/qwen_2_5_7B_RAGatouille.ipynb | 7321 +++++++------------ benchmark/qwen_2_5_7B_perfect_context.ipynb | 1282 ++-- 2 files changed, 3125 insertions(+), 5478 deletions(-) diff --git a/benchmark/qwen_2_5_7B_RAGatouille.ipynb b/benchmark/qwen_2_5_7B_RAGatouille.ipynb index dd586c4..8714014 100644 --- a/benchmark/qwen_2_5_7B_RAGatouille.ipynb +++ b/benchmark/qwen_2_5_7B_RAGatouille.ipynb @@ -1,4864 +1,2595 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Fcx4osZYq3mt" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZE32hJKeq3mv" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jDIEL7SNq3mv" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_OwS4mKRq3mv" - }, - "source": [ - "## GPU Check" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_FYZaTmnq3mw" - }, - "source": [ - "First, you'll need to enable GPUs for the notebook:\n", - "\n", - "- Navigate to `Edit`→`Notebook Settings`\n", - "- Select T4 GPU from the Hardware Accelerator section\n", - "- Click `Save` and accept.\n", - "\n", - "Next, we'll confirm that we can connect to the GPU:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4RsETkxfq3mw", - "outputId": "66a5e27f-9b93-402b-cba2-212ac3f83fbd" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GPU is available!\n" - ] - } - ], - "source": [ - "import torch\n", - "\n", - "if not torch.cuda.is_available():\n", - " raise RuntimeError(\"GPU not available\")\n", - "else:\n", - " print(\"GPU is available!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yEgVEmSQq3mx" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "P1eAychVq3my", - "outputId": "ab1ecc14-7c37-46e6-f3ba-a4be47c4dc31" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting ragatouille\n", - " Downloading ragatouille-0.0.8.post4-py3-none-any.whl.metadata (15 kB)\n", - "Collecting PyPDF2\n", - " Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)\n", - "Collecting colbert-ai==0.2.19 (from ragatouille)\n", - " Downloading colbert-ai-0.2.19.tar.gz (86 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.7/86.7 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting faiss-cpu<2.0.0,>=1.7.4 (from ragatouille)\n", - " Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)\n", - "Collecting fast-pytorch-kmeans==0.2.0.1 (from ragatouille)\n", - " Downloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl.metadata (1.1 kB)\n", - "Requirement already satisfied: langchain>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.15)\n", - "Requirement already satisfied: langchain_core>=0.1.4 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.31)\n", - "Collecting llama-index>=0.7 (from ragatouille)\n", - " Downloading llama_index-0.12.14-py3-none-any.whl.metadata (12 kB)\n", - "Collecting onnx<2.0.0,>=1.15.0 (from ragatouille)\n", - " Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n", - "Collecting sentence-transformers<3.0.0,>=2.2.2 (from ragatouille)\n", - " Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)\n", - "Collecting srsly==2.4.8 (from ragatouille)\n", - " Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", - "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.5.1+cu121)\n", - "Requirement already satisfied: transformers<5.0.0,>=4.36.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (4.47.1)\n", - "Collecting voyager<3.0.0,>=2.0.2 (from ragatouille)\n", - " Downloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)\n", - "Collecting bitarray (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)\n", - "Collecting datasets (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", - "Requirement already satisfied: flask in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.1.0)\n", - "Collecting git-python (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading git_python-1.0.3-py2.py3-none-any.whl.metadata (331 bytes)\n", - "Collecting python-dotenv (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", - "Collecting ninja (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.13.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (4.67.1)\n", - "Collecting ujson (from colbert-ai==0.2.19->ragatouille)\n", - " Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fast-pytorch-kmeans==0.2.0.1->ragatouille) (1.26.4)\n", - "Collecting pynvml (from fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", - " Downloading pynvml-12.0.0-py3-none-any.whl.metadata (5.4 kB)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.11/dist-packages (from srsly==2.4.8->ragatouille) (2.0.10)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from faiss-cpu<2.0.0,>=1.7.4->ragatouille) (24.2)\n", - "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (6.0.2)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.0.37)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (3.11.11)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.5)\n", - "Requirement already satisfied: langsmith<0.4,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.1)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.10.6)\n", - "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.32.3)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (9.0.0)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (1.33)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (4.12.2)\n", - "Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_agent_openai-0.4.3-py3-none-any.whl.metadata (727 bytes)\n", - "Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)\n", - "Collecting llama-index-core<0.13.0,>=0.12.14 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_core-0.12.14-py3-none-any.whl.metadata (2.5 kB)\n", - "Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)\n", - "Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl.metadata (3.6 kB)\n", - "Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl.metadata (3.3 kB)\n", - "Collecting llama-index-multi-modal-llms-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl.metadata (726 bytes)\n", - "Collecting llama-index-program-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_program_openai-0.3.1-py3-none-any.whl.metadata (764 bytes)\n", - "Collecting llama-index-question-gen-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl.metadata (783 bytes)\n", - "Collecting llama-index-readers-file<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_readers_file-0.4.4-py3-none-any.whl.metadata (5.4 kB)\n", - "Collecting llama-index-readers-llama-parse>=0.4.0 (from llama-index>=0.7->ragatouille)\n", - " Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl.metadata (3.6 kB)\n", - "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (3.9.1)\n", - "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.11/dist-packages (from onnx<2.0.0,>=1.15.0->ragatouille) (4.25.6)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (1.6.1)\n", - "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (0.27.1)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (11.1.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.17.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.5)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2024.10.0)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.1.105)\n", - "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (1.13.1)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13->ragatouille) (12.8.61)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.13->ragatouille) (1.3.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (2024.11.6)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.21.0)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.5.2)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.18.3)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain_core>=0.1.4->ragatouille) (3.0.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.28.1)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.10.15)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.0)\n", - "Requirement already satisfied: zstandard<0.24.0,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.23.0)\n", - "Requirement already satisfied: openai>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.59.9)\n", - "Collecting dataclasses-json (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", - " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", - "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.2.17)\n", - "Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", - " Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)\n", - "Collecting filetype<2.0.0,>=1.2.0 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", - " Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)\n", - "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.6.0)\n", - "Collecting tiktoken>=0.3.3 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", - " Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", - "Collecting typing-inspect>=0.8.0 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", - " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", - "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.17.2)\n", - "Collecting llama-cloud<0.2.0,>=0.1.8 (from llama-index-indices-managed-llama-cloud>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading llama_cloud-0.1.11-py3-none-any.whl.metadata (912 bytes)\n", - "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (4.12.3)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.2.2)\n", - "Collecting pypdf<6.0.0,>=5.1.0 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading pypdf-5.2.0-py3-none-any.whl.metadata (7.2 kB)\n", - "Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)\n", - "Collecting llama-parse>=0.5.0 (from llama-index-readers-llama-parse>=0.4.0->llama-index>=0.7->ragatouille)\n", - " Downloading llama_parse-0.5.20-py3-none-any.whl.metadata (6.9 kB)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (8.1.8)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (1.4.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (2.27.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2024.12.14)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain>=0.1.0->ragatouille) (3.1.1)\n", - "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (17.0.0)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets->colbert-ai==0.2.19->ragatouille)\n", - " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", - "Collecting xxhash (from datasets->colbert-ai==0.2.19->ragatouille)\n", - " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess<0.70.17 (from datasets->colbert-ai==0.2.19->ragatouille)\n", - " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", - "Collecting fsspec (from torch>=1.13->ragatouille)\n", - " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (3.1.3)\n", - "Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (2.2.0)\n", - "Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (1.9.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.13->ragatouille) (3.0.2)\n", - "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from git-python->colbert-ai==0.2.19->ragatouille) (3.1.44)\n", - "Collecting nvidia-ml-py<13.0.0a0,>=12.0.0 (from pynvml->fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", - " Downloading nvidia_ml_py-12.570.86-py3-none-any.whl.metadata (8.7 kB)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers<3.0.0,>=2.2.2->ragatouille) (3.5.0)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.6)\n", - "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.7.1)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.14.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.9.0)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (0.8.2)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.3.1)\n", - "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", - " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", - "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", - " Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->git-python->colbert-ai==0.2.19->ragatouille) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2025.1)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->git-python->colbert-ai==0.2.19->ragatouille) (5.0.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.17.0)\n", - "Downloading ragatouille-0.0.8.post4-py3-none-any.whl (41 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl (8.8 kB)\n", - "Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (490 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m490.9/490.9 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.5/27.5 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_index-0.12.14-py3-none-any.whl (6.9 kB)\n", - "Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.0/16.0 MB\u001b[0m \u001b[31m105.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m100.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_index_agent_openai-0.4.3-py3-none-any.whl (13 kB)\n", - "Downloading llama_index_cli-0.4.0-py3-none-any.whl (27 kB)\n", - "Downloading llama_index_core-0.12.14-py3-none-any.whl (1.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m81.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl (6.2 kB)\n", - "Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl (13 kB)\n", - "Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl (14 kB)\n", - "Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl (5.9 kB)\n", - "Downloading llama_index_program_openai-0.3.1-py3-none-any.whl (5.3 kB)\n", - "Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl (2.9 kB)\n", - "Downloading llama_index_readers_file-0.4.4-py3-none-any.whl (39 kB)\n", - "Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl (2.5 kB)\n", - "Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (286 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.1/286.1 kB\u001b[0m \u001b[31m27.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m41.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m20.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading git_python-1.0.3-py2.py3-none-any.whl (1.9 kB)\n", - "Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m422.9/422.9 kB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pynvml-12.0.0-py3-none-any.whl (26 kB)\n", - "Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", - "Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dirtyjson-1.0.8-py3-none-any.whl (25 kB)\n", - "Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", - "Downloading llama_cloud-0.1.11-py3-none-any.whl (250 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.6/250.6 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading llama_parse-0.5.20-py3-none-any.whl (16 kB)\n", - "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_ml_py-12.570.86-py3-none-any.whl (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pypdf-5.2.0-py3-none-any.whl (298 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.7/298.7 kB\u001b[0m \u001b[31m29.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading striprtf-0.0.26-py3-none-any.whl (6.9 kB)\n", - "Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", - "Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", - "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading marshmallow-3.26.0-py3-none-any.whl (50 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", - "Building wheels for collected packages: colbert-ai\n", - " Building wheel for colbert-ai (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for colbert-ai: filename=colbert_ai-0.2.19-py3-none-any.whl size=114759 sha256=1259c9368306c751f97b0a8a0e9b0b834b62d6afab66dc017b037e175bb4e949\n", - " Stored in directory: /root/.cache/pip/wheels/14/75/5f/9680ae93eb0258ccf3e9d8cd34f328c53f8888c06c37067f3a\n", - "Successfully built colbert-ai\n", - "Installing collected packages: striprtf, nvidia-ml-py, filetype, dirtyjson, bitarray, xxhash, voyager, ujson, srsly, python-dotenv, PyPDF2, pypdf, pynvml, onnx, ninja, mypy-extensions, marshmallow, fsspec, faiss-cpu, dill, typing-inspect, tiktoken, multiprocess, llama-cloud, git-python, dataclasses-json, llama-index-core, fast-pytorch-kmeans, datasets, sentence-transformers, llama-parse, llama-index-readers-file, llama-index-llms-openai, llama-index-indices-managed-llama-cloud, llama-index-embeddings-openai, colbert-ai, llama-index-readers-llama-parse, llama-index-multi-modal-llms-openai, llama-index-cli, llama-index-agent-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index, ragatouille\n", - " Attempting uninstall: srsly\n", - " Found existing installation: srsly 2.5.1\n", - " Uninstalling srsly-2.5.1:\n", - " Successfully uninstalled srsly-2.5.1\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2024.10.0\n", - " Uninstalling fsspec-2024.10.0:\n", - " Successfully uninstalled fsspec-2024.10.0\n", - " Attempting uninstall: sentence-transformers\n", - " Found existing installation: sentence-transformers 3.3.1\n", - " Uninstalling sentence-transformers-3.3.1:\n", - " Successfully uninstalled sentence-transformers-3.3.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed PyPDF2-3.0.1 bitarray-3.0.0 colbert-ai-0.2.19 dataclasses-json-0.6.7 datasets-3.2.0 dill-0.3.8 dirtyjson-1.0.8 faiss-cpu-1.9.0.post1 fast-pytorch-kmeans-0.2.0.1 filetype-1.2.0 fsspec-2024.9.0 git-python-1.0.3 llama-cloud-0.1.11 llama-index-0.12.14 llama-index-agent-openai-0.4.3 llama-index-cli-0.4.0 llama-index-core-0.12.14 llama-index-embeddings-openai-0.3.1 llama-index-indices-managed-llama-cloud-0.6.4 llama-index-llms-openai-0.3.14 llama-index-multi-modal-llms-openai-0.4.2 llama-index-program-openai-0.3.1 llama-index-question-gen-openai-0.3.0 llama-index-readers-file-0.4.4 llama-index-readers-llama-parse-0.4.0 llama-parse-0.5.20 marshmallow-3.26.0 multiprocess-0.70.16 mypy-extensions-1.0.0 ninja-1.11.1.3 nvidia-ml-py-12.570.86 onnx-1.17.0 pynvml-12.0.0 pypdf-5.2.0 python-dotenv-1.0.1 ragatouille-0.0.8.post4 sentence-transformers-2.7.0 srsly-2.4.8 striprtf-0.0.26 tiktoken-0.8.0 typing-inspect-0.9.0 ujson-5.10.0 voyager-2.1.0 xxhash-3.5.0\n" - ] - } - ], - "source": [ - "%pip install ragatouille PyPDF2" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "I0dl5xGnq3my", - "outputId": "395263da-3e7c-469b-ed18-f4e34a8a3774" - }, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-fztvdq23\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-fztvdq23\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 0b8e5cf9d2db91af71478a715bfdbba1b36316fa\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev77+g0b8e5cf)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev77+g0b8e5cf) (0.27.1)\n", - "Collecting loguru (from structured-qa==0.3.3.dev77+g0b8e5cf)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev77+g0b8e5cf) (2.10.6)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev77+g0b8e5cf)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev77+g0b8e5cf) (6.0.2)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev77+g0b8e5cf)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev77+g0b8e5cf) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.9.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev77+g0b8e5cf) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev77+g0b8e5cf) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev77+g0b8e5cf)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.23.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (24.3.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.36.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev77+g0b8e5cf) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m93.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m102.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m41.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m508.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev77+g0b8e5cf-py3-none-any.whl size=16202 sha256=471f9739e08b922697b7117495ad211785a46ff89fe92d96a32d34b93fba365b\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-xj29gks5/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=aaae19261f87cd2a4893f2b83751a95da7e0b5fae55a2d71054ad33c10ceac02\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - "Successfully built structured-qa fire\n", - "Installing collected packages: watchdog, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", - "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 streamlit-1.41.1 structured-qa-0.3.3.dev77+g0b8e5cf watchdog-6.0.0\n" - ] - } - ], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Nl_haxghq3mz", - "outputId": "9b2a4855-a3c3-4395-bc32-9a41a9030f36" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-01-29 09:01:27-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 21734 (21K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", - "\n", - "structured_qa.csv 100%[===================>] 21.22K --.-KB/s in 0.002s \n", - "\n", - "2025-01-29 09:01:27 (13.3 MB/s) - ‘structured_qa.csv’ saved [21734/21734]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZdWx_e7iq3mz" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "vGqX_bU5q3mz" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "cbkIjBYNq3mz" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "BiUeBWnIq3mz" - }, - "outputs": [], - "source": [ - "import PyPDF2\n", - "\n", - "\n", - "def load_pdf(pdf_file: str) -> str | None:\n", - " try:\n", - " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", - " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", - " except Exception as e:\n", - " logger.exception(e)\n", - " return None" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "z0B2yhFISDgG" - }, - "source": [ - "## Function to Process all questions for a single Document" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "Ilxn8LGFq3m0" - }, - "outputs": [], - "source": [ - "from ragatouille import RAGPretrainedModel\n", - "from ragatouille.data import CorpusProcessor\n", - "\n", - "\n", - "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", - "You are a rigorous assistant answering questions.\n", - "You only answer based on the current information available.\n", - "The current information available is:\n", - "\n", - "```\n", - "{CURRENT_INFO}\n", - "```\n", - "\n", - "The answer must be in one of the following formats:\n", - "- YES/NO (for boolean questions)\n", - "Is the model an LLM?\n", - "YES\n", - "- Number (for numeric questions)\n", - "How many layers does the model have?\n", - "12\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "C\n", - "\"\"\"\n", - "\n", - "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - "):\n", - " logger.info(\"Setting up RAG\")\n", - " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", - " corpus_processor = CorpusProcessor()\n", - " documents = corpus_processor.process_corpus([load_pdf(document_file)])\n", - " RAG.encode([x[\"content\"] for x in documents])\n", - "\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " question = row[\"question\"]\n", - " question_part, *options = question.split(\"?\")\n", - "\n", - " logger.info(f\"Question: {question}\")\n", - " results = RAG.search_encoded_docs(query=question_part, k=3)\n", - " current_info = \"\\n\".join(result[\"content\"] for result in results)\n", - " logger.info(current_info[:100])\n", - "\n", - " messages = [\n", - " {\n", - " \"role\": \"system\",\n", - " \"content\": ANSWER_WITH_TYPE_PROMPT.format(CURRENT_INFO=current_info),\n", - " },\n", - " {\"role\": \"user\", \"content\": question},\n", - " ]\n", - " answer = model.get_response(messages)\n", - " logger.info(answer)\n", - " answers[index] = answer\n", - " sections[index] = None\n", - "\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jr3ke2aJq3m0" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "zKMHc0Ouq3m0" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_llama_cpp_model" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "cMBl2dxLq3m0" - }, - "outputs": [], - "source": [ - "model = load_llama_cpp_model(\n", - " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j5jWlVBaq3m1" - }, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000, - "referenced_widgets": [ - "20d67e8902244d87ad72120b9fb71284", - "1e7fcaa0156d4f09af4bf24a8607e787", - "0bad96f6403c4042a9ed7bb491c1b25d", - "1c9e0ff5abab4e378959f47c5655f9f7", - "dd0ddf2594eb42b4babe6eeaf6a59bbb", - "5e532f20ae6d4a5c90d5beba1518d3ee", - "50215024305b41c38aec0a3808b3bc84", - "a42220b511f14cd8b89f5071c0d216a4", - "46097609bd4b46fa94c27a5dcfe98a1a", - "e3084de2764a48089029ffafa1087e8a", - "420798f709e2420d81d7223c34ca442e", - "8209dde69d4147739c522342bfedcccd", - "066c98c9848e4e00b68d0e98ec6f3c1f", - "c88515f583bd469ca0d6ca54a812ca14", - "a47e31ce610b4dcf8ac934ec11aefc65", - "38bd9b6cec8f42f1a9b2caca71478f4b", - "c8939bbe84c24ff8ad43c8d996d29af2", - "9a8317a8c8754d4d8b513a7fb0366c8d", - "ea0ed18363ec4a86b0383e0b43d38ac7", - "dfb0d5f3c9ae46dc910d335a9215521a", - "a2a6f8043e9943c7a6ec9112ac3d33bd", - "8d18d3f17569471fade4a2df380a245c", - "29d523b694174b7596944eeb86a553d0", - "73d7ade0b58b41d1b1ac109026eeabc8", - "cc1c0fcd84b94a199612c3e7ccd906cd", - "5dbe5cc9d6e64e5cb62e7018a42e1f8e", - "56de5716ee0146158e399759aef55c41", - "7f05356467fa4c2ab321004efa06e9c9", - "ed08c56e20194dbca6732642fb4af466", - "266e8497e8b04e3fad5d23391960ed13", - "0bebf69871bb4d04a5329ecb32d64b06", - "be5d5dcca8cb498d8eb982b1cc1273fc", - "f6e97fcb881443beaec839bd64530d2d", - "d1b166882cef441c816a75b784b3dcb0", - "5f145f7ffcd540149cd775f01e3da418", - "6766b3d159fd4c29b853f3ad44616429", - "e3ec24ca9f384b6e8a6b25f66c9a2872", - "5b034562b2354e70a27bc06f5fe674cd", - "f76cfc2d293d4b409e4fc8bfa805af96", - "167d14dc1f3b42fe9f4d9cc2ec341363", - "3919381f1ae247219c7e4378a5d2e1ff", - "2a59d91e7621422ebda4fefca0ee6760", - "d44706bfc8494edc8f266d3a94ff16a2", - "13dd434100e747588f8be140f55305a3", - "81a8270d87ef4c9b80c46c5236c8292f", - "7d4aa0529fb74e81a08cc12aeb243456", - "67f985db0d7b41f7b15f135d6acb039e", - "6da62a5ad31940329f00748ad6eab4da", - "27ec9d176d11451bb049b62c278a86ff", - "4e15263fae0140299c6a55ce95f7bd43", - "cacdb3a3a0e04ca3b744fb82a3dcc925", - "97d1348ebec44687ac2a9151d52b1e8f", - "ae77818599cd4bc2ac761865e81c3f15", - "44b4d7daccdb46f19db7675c3a7d4f49", - "ee9dca0e0f2c49a49fb50b623818cda9", - "3bd3d79c0262467296061f64606e57ce", - "024598891b4f46299dc20b5cfd714e0c", - "9846ac95a9864f6aad40bffcd1595c48", - "f7e1a279ca7a4576a67d600c6e0fcad6", - "1eda4198a078469dbba236c3ed8654c3", - "45ce30572c20425691ebdabe0696b0ec", - "667241a7a4e6442b9e32450dbcbb0f56", - "11c36278698f4a6e8f606811eaff2166", - "c8a050cfb1164c1cbecb0a86bc555d9c", - "004ad74940344b6eb376ae4cfc85f26b", - "cb69dbb882694ed3bab1a2b35e0df524", - "54af3da7793c404fa8b4e1062185ea68", - "24ae74e4073749fba785b660dac48f4c", - "895f37ac364f4c1aa4b3089fa286fca3", - "f63e1751a94246888bf0426a2288cb36", - "90076a55ec674636b93c7b1d741ea374", - "944a78e6adaf4e3a87551d0bd5a6fc75", - "8d7d0da8d2344625aeef3d1c452a9c68", - "747558448b5e40038b270a6a6f6af6f0", - "81ee5fe4f8044ab9819b9f767c41826e", - "4f1165cdc7ef4701889d0e6de6ac9ed1", - "1601603b8da04598b2a3b1b6532b9de9" - ] - }, - "id": "W9r17Rz3q3m1", - "outputId": "e4618fa8-bdc7-4d27-938e-2a9fb69643f0" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-29 09:01:52.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-29 09:01:53.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:01:53.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:01:53.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "20d67e8902244d87ad72120b9fb71284", - "version_major": 2, - "version_minor": 0 + "cell_type": "markdown", + "metadata": { + "id": "Fcx4osZYq3mt" }, - "text/plain": [ - "artifact.metadata: 0%| | 0.00/1.63k [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-01-29 09:04:06.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:04:06.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 56 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:05:32.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:05:32.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 137 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-01-29 09:05:49.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:05:49.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 199 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-01-29 09:07:11.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:07:11.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 44 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:08:24.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:08:24.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 143 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:08:37.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:08:37.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 364 documents...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/12 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:09:53.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:09:53.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 1803 documents...\n" - ] + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "wLom5F1VEjYi", + "outputId": "9fcfc092-1856-4fac-9bda-382abdf8be6c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r 0%| | 0/57 [00:00=1.7.4 (from ragatouille)\n", + " Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)\n", + "Collecting fast-pytorch-kmeans==0.2.0.1 (from ragatouille)\n", + " Downloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl.metadata (1.1 kB)\n", + "Requirement already satisfied: langchain>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.15)\n", + "Requirement already satisfied: langchain_core>=0.1.4 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.31)\n", + "Collecting llama-index>=0.7 (from ragatouille)\n", + " Downloading llama_index-0.12.14-py3-none-any.whl.metadata (12 kB)\n", + "Collecting onnx<2.0.0,>=1.15.0 (from ragatouille)\n", + " Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n", + "Collecting sentence-transformers<3.0.0,>=2.2.2 (from ragatouille)\n", + " Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)\n", + "Collecting srsly==2.4.8 (from ragatouille)\n", + " Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", + "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.5.1+cu124)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.36.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (4.47.1)\n", + "Collecting voyager<3.0.0,>=2.0.2 (from ragatouille)\n", + " Downloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)\n", + "Collecting bitarray (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)\n", + "Collecting datasets (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: flask in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.1.0)\n", + "Collecting git-python (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading git_python-1.0.3-py2.py3-none-any.whl.metadata (331 bytes)\n", + "Collecting python-dotenv (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", + "Collecting ninja (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.13.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (4.67.1)\n", + "Collecting ujson (from colbert-ai==0.2.19->ragatouille)\n", + " Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fast-pytorch-kmeans==0.2.0.1->ragatouille) (1.26.4)\n", + "Collecting pynvml (from fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", + " Downloading pynvml-12.0.0-py3-none-any.whl.metadata (5.4 kB)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.11/dist-packages (from srsly==2.4.8->ragatouille) (2.0.10)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from faiss-cpu<2.0.0,>=1.7.4->ragatouille) (24.2)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.0.37)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (3.11.11)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.5)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.1)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.10.6)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (9.0.0)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (1.33)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (4.12.2)\n", + "Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_agent_openai-0.4.3-py3-none-any.whl.metadata (727 bytes)\n", + "Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting llama-index-core<0.13.0,>=0.12.14 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_core-0.12.14-py3-none-any.whl.metadata (2.5 kB)\n", + "Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)\n", + "Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting llama-index-multi-modal-llms-openai<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl.metadata (726 bytes)\n", + "Collecting llama-index-program-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_program_openai-0.3.1-py3-none-any.whl.metadata (764 bytes)\n", + "Collecting llama-index-question-gen-openai<0.4.0,>=0.3.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl.metadata (783 bytes)\n", + "Collecting llama-index-readers-file<0.5.0,>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_readers_file-0.4.4-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting llama-index-readers-llama-parse>=0.4.0 (from llama-index>=0.7->ragatouille)\n", + " Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl.metadata (3.6 kB)\n", + "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (3.9.1)\n", + "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.11/dist-packages (from onnx<2.0.0,>=1.15.0->ragatouille) (4.25.6)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (1.6.1)\n", + "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (0.27.1)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (11.1.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.17.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.5)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2024.10.0)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.4.127)\n", + "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch>=1.13->ragatouille)\n", + " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.13->ragatouille) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.21.0)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.5.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (25.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.18.3)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain_core>=0.1.4->ragatouille) (3.0.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.28.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.10.15)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.0)\n", + "Requirement already satisfied: zstandard<0.24.0,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.23.0)\n", + "Requirement already satisfied: openai>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.59.9)\n", + "Collecting dataclasses-json (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.2.18)\n", + "Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)\n", + "Collecting filetype<2.0.0,>=1.2.0 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.6.0)\n", + "Collecting tiktoken>=0.3.3 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "Collecting typing-inspect>=0.8.0 (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille) (1.17.2)\n", + "Collecting llama-cloud<0.2.0,>=0.1.8 (from llama-index-indices-managed-llama-cloud>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading llama_cloud-0.1.11-py3-none-any.whl.metadata (912 bytes)\n", + "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (4.12.3)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.2.2)\n", + "Collecting pypdf<6.0.0,>=5.1.0 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading pypdf-5.2.0-py3-none-any.whl.metadata (7.2 kB)\n", + "Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)\n", + "Collecting llama-parse>=0.5.0 (from llama-index-readers-llama-parse>=0.4.0->llama-index>=0.7->ragatouille)\n", + " Downloading llama_parse-0.5.20-py3-none-any.whl.metadata (6.9 kB)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (8.1.8)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (1.4.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2024.12.14)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain>=0.1.0->ragatouille) (3.1.1)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (17.0.0)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Collecting xxhash (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets->colbert-ai==0.2.19->ragatouille)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec (from torch>=1.13->ragatouille)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (3.1.3)\n", + "Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (2.2.0)\n", + "Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (1.9.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.13->ragatouille) (3.0.2)\n", + "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from git-python->colbert-ai==0.2.19->ragatouille) (3.1.44)\n", + "Collecting nvidia-ml-py<13.0.0a0,>=12.0.0 (from pynvml->fast-pytorch-kmeans==0.2.0.1->ragatouille)\n", + " Downloading nvidia_ml_py-12.570.86-py3-none-any.whl.metadata (8.7 kB)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers<3.0.0,>=2.2.2->ragatouille) (3.5.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.6)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.14.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.9.0)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (0.8.2)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.3.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->llama-index-core<0.13.0,>=0.12.14->llama-index>=0.7->ragatouille)\n", + " Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->git-python->colbert-ai==0.2.19->ragatouille) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2025.1)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->git-python->colbert-ai==0.2.19->ragatouille) (5.0.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.17.0)\n", + "Downloading ragatouille-0.0.8.post4-py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fast_pytorch_kmeans-0.2.0.1-py3-none-any.whl (8.8 kB)\n", + "Downloading srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (490 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m490.9/490.9 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m24.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.5/27.5 MB\u001b[0m \u001b[31m77.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index-0.12.14-py3-none-any.whl (6.9 kB)\n", + "Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.0/16.0 MB\u001b[0m \u001b[31m109.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m109.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m89.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m42.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading voyager-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m108.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_agent_openai-0.4.3-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_cli-0.4.0-py3-none-any.whl (27 kB)\n", + "Downloading llama_index_core-0.12.14-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m60.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl (6.2 kB)\n", + "Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl (13 kB)\n", + "Downloading llama_index_llms_openai-0.3.14-py3-none-any.whl (14 kB)\n", + "Downloading llama_index_multi_modal_llms_openai-0.4.2-py3-none-any.whl (5.9 kB)\n", + "Downloading llama_index_program_openai-0.3.1-py3-none-any.whl (5.3 kB)\n", + "Downloading llama_index_question_gen_openai-0.3.0-py3-none-any.whl (2.9 kB)\n", + "Downloading llama_index_readers_file-0.4.4-py3-none-any.whl (39 kB)\n", + "Downloading llama_index_readers_llama_parse-0.4.0-py3-none-any.whl (2.5 kB)\n", + "Downloading bitarray-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (286 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.1/286.1 kB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading git_python-1.0.3-py2.py3-none-any.whl (1.9 kB)\n", + "Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m422.9/422.9 kB\u001b[0m \u001b[31m34.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pynvml-12.0.0-py3-none-any.whl (26 kB)\n", + "Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dirtyjson-1.0.8-py3-none-any.whl (25 kB)\n", + "Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", + "Downloading llama_cloud-0.1.11-py3-none-any.whl (250 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.6/250.6 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading llama_parse-0.5.20-py3-none-any.whl (16 kB)\n", + "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_ml_py-12.570.86-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pypdf-5.2.0-py3-none-any.whl (298 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.7/298.7 kB\u001b[0m \u001b[31m30.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading striprtf-0.0.26-py3-none-any.whl (6.9 kB)\n", + "Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m66.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading marshmallow-3.26.0-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Building wheels for collected packages: colbert-ai\n", + " Building wheel for colbert-ai (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for colbert-ai: filename=colbert_ai-0.2.19-py3-none-any.whl size=114759 sha256=c5fc763cea6b0f0dde2560deb67532087429f940da047fc0bb4516a869abbe3b\n", + " Stored in directory: /root/.cache/pip/wheels/14/75/5f/9680ae93eb0258ccf3e9d8cd34f328c53f8888c06c37067f3a\n", + "Successfully built colbert-ai\n", + "Installing collected packages: striprtf, nvidia-ml-py, filetype, dirtyjson, bitarray, xxhash, voyager, ujson, srsly, python-dotenv, PyPDF2, pypdf, pynvml, onnx, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, ninja, mypy-extensions, marshmallow, fsspec, faiss-cpu, dill, typing-inspect, tiktoken, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, nvidia-cusolver-cu12, llama-cloud, git-python, dataclasses-json, llama-index-core, datasets, sentence-transformers, llama-parse, llama-index-readers-file, llama-index-llms-openai, llama-index-indices-managed-llama-cloud, llama-index-embeddings-openai, fast-pytorch-kmeans, colbert-ai, llama-index-readers-llama-parse, llama-index-multi-modal-llms-openai, llama-index-cli, llama-index-agent-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index, ragatouille\n", + " Attempting uninstall: srsly\n", + " Found existing installation: srsly 2.5.1\n", + " Uninstalling srsly-2.5.1:\n", + " Successfully uninstalled srsly-2.5.1\n", + " Attempting uninstall: nvidia-nvjitlink-cu12\n", + " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", + " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", + " Attempting uninstall: nvidia-curand-cu12\n", + " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", + " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", + " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", + " Attempting uninstall: nvidia-cufft-cu12\n", + " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", + " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", + " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", + " Attempting uninstall: nvidia-cuda-runtime-cu12\n", + " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", + " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-cupti-cu12\n", + " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cublas-cu12\n", + " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", + " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", + " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + " Attempting uninstall: nvidia-cusparse-cu12\n", + " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", + " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", + " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", + " Attempting uninstall: nvidia-cudnn-cu12\n", + " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", + " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", + " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", + " Attempting uninstall: nvidia-cusolver-cu12\n", + " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", + " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", + " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", + " Attempting uninstall: sentence-transformers\n", + " Found existing installation: sentence-transformers 3.3.1\n", + " Uninstalling sentence-transformers-3.3.1:\n", + " Successfully uninstalled sentence-transformers-3.3.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed PyPDF2-3.0.1 bitarray-3.0.0 colbert-ai-0.2.19 dataclasses-json-0.6.7 datasets-3.2.0 dill-0.3.8 dirtyjson-1.0.8 faiss-cpu-1.9.0.post1 fast-pytorch-kmeans-0.2.0.1 filetype-1.2.0 fsspec-2024.9.0 git-python-1.0.3 llama-cloud-0.1.11 llama-index-0.12.14 llama-index-agent-openai-0.4.3 llama-index-cli-0.4.0 llama-index-core-0.12.14 llama-index-embeddings-openai-0.3.1 llama-index-indices-managed-llama-cloud-0.6.4 llama-index-llms-openai-0.3.14 llama-index-multi-modal-llms-openai-0.4.2 llama-index-program-openai-0.3.1 llama-index-question-gen-openai-0.3.0 llama-index-readers-file-0.4.4 llama-index-readers-llama-parse-0.4.0 llama-parse-0.5.20 marshmallow-3.26.0 multiprocess-0.70.16 mypy-extensions-1.0.0 ninja-1.11.1.3 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-ml-py-12.570.86 nvidia-nvjitlink-cu12-12.4.127 onnx-1.17.0 pynvml-12.0.0 pypdf-5.2.0 python-dotenv-1.0.1 ragatouille-0.0.8.post4 sentence-transformers-2.7.0 srsly-2.4.8 striprtf-0.0.26 tiktoken-0.8.0 typing-inspect-0.9.0 ujson-5.10.0 voyager-2.1.0 xxhash-3.5.0\n" + ] + } + ], + "source": [ + "%pip install ragatouille PyPDF2" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shapes:\n", - "encodings: torch.Size([1803, 508, 128])\n", - "doc_masks: torch.Size([1803, 508])\n", - "Documents encoded!\n" - ] + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "I0dl5xGnq3my", + "outputId": "3462f240-5e8c-446b-d98b-3ce8624b1e43" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-b37yz5_5\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-b37yz5_5\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 036f8a399a9eb21539ea42a1f8ced5db11e20433\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev87+g036f8a3) (0.27.1)\n", + "Collecting loguru (from structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev87+g036f8a3) (2.10.6)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev87+g036f8a3) (6.0.2)\n", + "Collecting rapidfuzz (from structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev87+g036f8a3) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (2024.9.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev87+g036f8a3) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev87+g036f8a3) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev87+g036f8a3)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev87+g036f8a3) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (1.24.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev87+g036f8a3) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev87+g036f8a3) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m370.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m60.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m121.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m119.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m98.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev87+g036f8a3-py3-none-any.whl size=16324 sha256=44779f73ce625b12412370bf3cb6b9afea29e0750f6a706b25e570b1814d0a65\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-04wbzr5n/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=0bc0dd1a95d2f10b061abc3634edaae132a1e1550dde89ee6cbabec5470a1267\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.11.0 streamlit-1.41.1 structured-qa-0.3.3.dev87+g036f8a3 watchdog-6.0.0\n" + ] + } + ], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-29 09:10:22.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": 1024\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:22.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:22.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThis provides a natural way\n", - "to invoke computation across the elements in a domain such as a vector, \u001b[0m\n", - "\u001b[32m2025-01-29 09:10:23.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - "\"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:23.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:23.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThis section gives an overview of nvcc workflow and command\n", - "options. A complete description can be f\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:25.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:25.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:25.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThe modified host code is output either as C++ code that is left to be compiled using another tool o\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:26.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:26.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:26.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThis environment vari-\n", - "able can be used to validate\n", - "that PTX code is embedded in\n", - "an application and \u001b[0m\n", - "\u001b[32m2025-01-29 09:10:28.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:10:28.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:28.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:28.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThe modified host code is output either as C++ code that is left to be compiled using another tool o\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:31.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:31.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:31.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mWhile these virtual addresses\n", - "are fixed for the lifetime of the allocation node, the allocation cont\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:32.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:32.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:32.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mGraph memory nodes are\n", - "only supported on driver versions 11.4 and newer.\n", - "379\n", - "CUDA C++ Programming Gu\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:34.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:34.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:34.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mWhile these virtual addresses\n", - "are fixed for the lifetime of the allocation node, the allocation cont\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:35.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:35.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:35.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA program must guarantee that operations accessing graph memory:\n", - "▶are ordered after the allocation n\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:43.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:43.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:43.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mLazy Loading\n", - "23.1. What is Lazy Loading?\n", - "Lazy Loading delays loading of CUDA modules and kernels fro\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:44.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:44.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:44.849\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mLazy Loading\n", - "23.1. What is Lazy Loading?\n", - "Lazy Loading delays loading of CUDA modules and kernels fro\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:46.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:46.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:47.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689 to ?uri=OJ:L_202401689.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:11:47.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Nl_haxghq3mz", + "outputId": "a43ff078-2a34-4bf3-e8b3-0388b27d281a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2025-01-30 12:00:24-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21734 (21K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "structured_qa.csv 100%[===================>] 21.22K --.-KB/s in 0s \n", + "\n", + "2025-01-30 12:00:25 (52.0 MB/s) - ‘structured_qa.csv’ saved [21734/21734]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 754 documents...\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "ZdWx_e7iq3mz" + }, + "source": [ + "# Setup" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/24 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:13:14.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:13:14.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "BiUeBWnIq3mz" + }, + "outputs": [], + "source": [ + "import PyPDF2\n", + "\n", + "\n", + "def load_pdf(pdf_file: str) -> str | None:\n", + " try:\n", + " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", + " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", + " except Exception as e:\n", + " logger.exception(e)\n", + " return None" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 17 documents...\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "z0B2yhFISDgG" + }, + "source": [ + "## Function to Process all questions for a single Document" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:41.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:41.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "zKMHc0Ouq3m0" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Encoding 48 documents...\n" - ] + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "cMBl2dxLq3m0", + "outputId": "46e74b2c-4aa3-4d8f-9fe1-b04fd97609a0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153, + "referenced_widgets": [ + "85021e2e71eb4797b12938e40dd520ca", + "034aa03bfb4a40b995abac507b730fb0", + "ed0530d56c834c3396a29f6ace0d0002", + "9ac90a1242fd4538a0ec300ad6c8580d", + "480005206d7a4d8ba5345d1fb7176d21", + "0ce54b97711e42b6a543c98df81bf183", + "fcc1a1157c964fd3952f2ceeb75380fb", + "ced7a587317f4befbe2b163b2c14b446", + "69a48a84064f47d8bf5f4d111dda9400", + "7f643e8e8bce41609b418af53b374174", + "b6bd4737fa764ae2aa3112d76d78d699" + ] + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-30 12:16:10.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:16:10.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:16:10.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 1214 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/38 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:09.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile 1706.03762.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:09.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 56 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:19.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile 2106.09685.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:19.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 137 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:33.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile 2201.11903.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:33.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 199 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:43.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile 2210.05189.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:43.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 44 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:49.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:49.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 143 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:56.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:17:56.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 364 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/12 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:18:09.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile CUDA_C_Programming_Guide.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:18:09.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 1803 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/57 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-30 12:18:46.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile ?uri=OJ:L_202401689.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:18:46.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 754 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/24 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:03.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile 7DUME_EN01_Rules.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:03.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 17 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:19.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mFile is_eotn_rulebook.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:19.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Encoding 48 documents...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/2 [00:00Discard any remaining, face-up Island cards and reveal new ones.\n", + " >Pass the First player marker to \u001b[0m\n", + "\u001b[32m2025-01-30 12:19:24.553\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m25\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:24.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:24.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m>There is no limit to the number, type, or order of \n", + "actions a player may take during the Action pha\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:25.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:25.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:25.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mRations are needed for the long journey!\n", + "A player can choose to Pillage a selected Island card with\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:26.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:26.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:26.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mEach \n", + "action draws the clans closer to becoming the greatest empire! The \n", + "game ends in the same roun\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:27.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m1\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:27.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:27.078\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mGAME FLOW\n", + "Note for Imperial Settlers fans \n", + "You cannot Spend 2 Workers \n", + "to get a Resource or a card.\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:27.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:27.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:27.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", + "the Resources on the righ\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:28.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m1\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:28.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:28.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mNOTE 2: Some abilities in the \n", + "game have a ‘/’ divider between \n", + "presented choices. This should be \n", + "t\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:29.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:29.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:29.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mThus allowing a player to play \n", + "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:30.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:30.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:30.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:31.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:31.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:31.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-30 12:19:32.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mA\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-29 09:15:44.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:44.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:44.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNOTE 1: There’s no limit to the number of cards a player may have \n", - "in their hand. \n", - "NOTE 2: If the\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:50.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - "\"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:50.716\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:50.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mStarting with the First player and continuing clockwise, \n", - "each player performs one action at a time.\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:52.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:52.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:52.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>Discard any remaining, face-up Island cards and reveal new ones.\n", - " >Pass the First player marker to \u001b[0m\n", - "\u001b[32m2025-01-29 09:15:53.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:15:53.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:53.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:53.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>There is no limit to the number, type, or order of \n", - "actions a player may take during the Action pha\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:55.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:55.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:55.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mRations are needed for the long journey!\n", - "A player can choose to Pillage a selected Island card with\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:57.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - "\"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:57.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:57.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mEach \n", - "action draws the clans closer to becoming the greatest empire! The \n", - "game ends in the same roun\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:58.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"The document says that players gain VPs from Pillaging Islands but not how many victory points each conquered island gives\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:58.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-29 09:16:58.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mGAME FLOW\n", - "Note for Imperial Settlers fans \n", - "You cannot Spend 2 Workers \n", - "to get a Resource or a card.\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:00.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:00.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:00.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", - "the Resources on the righ\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:01.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:01.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:01.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNOTE 2: Some abilities in the \n", - "game have a ‘/’ divider between \n", - "presented choices. This should be \n", - "t\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:04.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - "\"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:04.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:04.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThus allowing a player to play \n", - "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:07.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:07.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:07.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:09.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:09.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:09.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-01-29 09:17:12.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 269 + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "mltqL7Bhq3m1", + "outputId": "5c965422-1c25-4305-ea9d-62df9ebc4cfc" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "28 28 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "44 44 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "53 53 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "22 Classification of general-purpose AI models as... \n", + "28 Compliant AI systems which present a risk \n", + "44 3.2 Results \n", + "47 CARD AND TILE EFFECTS \n", + "51 CHAPTER OVERVIEW \n", + "52 CARD AND TILE COSTS \n", + "53 CARD AND TILE COSTS \n", + "55 CARD AND TILE EFFECTS \n", + "65 EXPEDITION PHASE \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "22 What is the threshold, measured in floating po... C \n", + "28 What is the time period for a market surveilla... C \n", + "44 How many random samples were examined to under... 100 \n", + "47 How many different races are there? 6 \n", + "51 After taking a landmark tile, do you reveal a ... NO \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "53 If a player is missing 2 skill symbols, how ma... 2 \n", + "55 Which type of cards provide coins? -A: Gray -B... B \n", + "65 Do you need a fish to conquer a distant island? YES \n", + "\n", + " pred_answer pred_section \n", + "10 PDROP= 0.1 NaN \n", + "22 B NaN \n", + "28 A NaN \n", + "44 50 NaN \n", + "47 NUMBER OF DIFFERENT RACES: 7 NaN \n", + "51 YES NaN \n", + "52 NO NaN \n", + "53 NO\\n- SINGLE LETTER (FOR MULTIPLE-CHOICE QUEST... NaN \n", + "55 A NaN \n", + "65 NO NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP= 0.1NaN
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Classification of general-purpose AI models as...What is the threshold, measured in floating po...CBNaN
2828https://eur-lex.europa.eu/legal-content/EN/TXT...Compliant AI systems which present a riskWhat is the time period for a market surveilla...CANaN
4444https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?6NUMBER OF DIFFERENT RACES: 7NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5353https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSIf a player is missing 2 skill symbols, how ma...2NO\\n- SINGLE LETTER (FOR MULTIPLE-CHOICE QUEST...NaN
5555https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BANaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 10,\n \"max\": 65,\n \"num_unique_values\": 10,\n \"samples\": [\n 55,\n 22,\n 51\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"Classification of general-purpose AI models as general-purpose AI models with systemic risk\",\n \"CHAPTER OVERVIEW\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\",\n \"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\",\n \"After taking a landmark tile, do you reveal a new tile and the end of your turn?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"C\",\n \"YES\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"B\",\n \"YES\",\n \"PDROP= 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 21 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith((f\"-{result['answer']}\", f\"{result['answer']}\")):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] }, - "id": "mltqL7Bhq3m1", - "outputId": "9fc0b64a-2b6c-4e05-9165-5b6b5bf52508" - }, - "outputs": [ { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 7,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18,\n \"min\": 22,\n \"max\": 83,\n \"num_unique_values\": 7,\n \"samples\": [\n 22,\n 44,\n 66\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Classification of general-purpose AI models as general-purpose AI models with systemic risk\",\n \"3.2 Results\",\n \"15.3. API Fundamentals\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\",\n \"How many random samples were examined to understand model performance?\",\n \"How many victory points you get from each conquered island?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"C\",\n \"100\",\n \"1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"NO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCUMENT.\",\n \"50\",\n \"THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM PILLAGING ISLANDS BUT NOT HOW MANY VICTORY POINTS EACH CONQUERED ISLAND GIVES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c4z9XxXWq3m1", + "outputId": "29f0fe17-85ce-4792-f6c5-08a8b8e2c6b3" }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Classification of general-purpose AI models as...What is the threshold, measured in floating po...CNO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCU...NaN
4444https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?67NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5555https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BNONE OF THE ABOVENaN
6666https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEHow many victory points you get from each conq...1THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM P...NaN
8383https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsWhen are virtual addresses assigned to graph a...CANaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.898989898989899" + ] + }, + "metadata": {}, + "execution_count": 22 + } ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "44 44 https://arxiv.org/pdf/2201.11903 \n", - "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", - "66 66 https://github.com/mozilla-ai/structured-qa/re... \n", - "83 83 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "\n", - " section \\\n", - "22 Classification of general-purpose AI models as... \n", - "44 3.2 Results \n", - "47 CARD AND TILE EFFECTS \n", - "52 CARD AND TILE COSTS \n", - "55 CARD AND TILE EFFECTS \n", - "66 EXPEDITION PHASE \n", - "83 15.3. API Fundamentals \n", - "\n", - " question answer \\\n", - "22 What is the threshold, measured in floating po... C \n", - "44 How many random samples were examined to under... 100 \n", - "47 How many different races are there? 6 \n", - "52 Can a player pay coins to compensate for missi... YES \n", - "55 Which type of cards provide coins? -A: Gray -B... B \n", - "66 How many victory points you get from each conq... 1 \n", - "83 When are virtual addresses assigned to graph a... C \n", - "\n", - " pred_answer pred_section \n", - "22 NO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCU... NaN \n", - "44 50 NaN \n", - "47 7 NaN \n", - "52 NO NaN \n", - "55 NONE OF THE ABOVE NaN \n", - "66 THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM P... NaN \n", - "83 A NaN " + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "c4z9XxXWq3m1", - "outputId": "327b1131-3ed5-40ff-f68d-48b0727873c2" - }, - "outputs": [ { - "data": { - "text/plain": [ - "0.9292929292929293" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UXg_TC7R28QI" + }, + "outputs": [], + "source": [] } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "UXg_TC7R28QI" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "004ad74940344b6eb376ae4cfc85f26b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "024598891b4f46299dc20b5cfd714e0c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_45ce30572c20425691ebdabe0696b0ec", - "placeholder": "​", - "style": "IPY_MODEL_667241a7a4e6442b9e32450dbcbb0f56", - "value": "tokenizer.json: 100%" - } - }, - "066c98c9848e4e00b68d0e98ec6f3c1f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c8939bbe84c24ff8ad43c8d996d29af2", - "placeholder": "​", - "style": "IPY_MODEL_9a8317a8c8754d4d8b513a7fb0366c8d", - "value": "config.json: 100%" - } - }, - "0bad96f6403c4042a9ed7bb491c1b25d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a42220b511f14cd8b89f5071c0d216a4", - "max": 1633, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_46097609bd4b46fa94c27a5dcfe98a1a", - "value": 1633 - } - }, - "0bebf69871bb4d04a5329ecb32d64b06": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "11c36278698f4a6e8f606811eaff2166": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "13dd434100e747588f8be140f55305a3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1601603b8da04598b2a3b1b6532b9de9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "167d14dc1f3b42fe9f4d9cc2ec341363": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1c9e0ff5abab4e378959f47c5655f9f7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e3084de2764a48089029ffafa1087e8a", - "placeholder": "​", - "style": "IPY_MODEL_420798f709e2420d81d7223c34ca442e", - "value": " 1.63k/1.63k [00:00<00:00, 72.1kB/s]" - } - }, - "1e7fcaa0156d4f09af4bf24a8607e787": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5e532f20ae6d4a5c90d5beba1518d3ee", - "placeholder": "​", - "style": "IPY_MODEL_50215024305b41c38aec0a3808b3bc84", - "value": "artifact.metadata: 100%" - } - }, - "1eda4198a078469dbba236c3ed8654c3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "20d67e8902244d87ad72120b9fb71284": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1e7fcaa0156d4f09af4bf24a8607e787", - "IPY_MODEL_0bad96f6403c4042a9ed7bb491c1b25d", - "IPY_MODEL_1c9e0ff5abab4e378959f47c5655f9f7" - ], - "layout": "IPY_MODEL_dd0ddf2594eb42b4babe6eeaf6a59bbb" - } - }, - "24ae74e4073749fba785b660dac48f4c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_944a78e6adaf4e3a87551d0bd5a6fc75", - "placeholder": "​", - "style": "IPY_MODEL_8d7d0da8d2344625aeef3d1c452a9c68", - "value": "special_tokens_map.json: 100%" - } - }, - "266e8497e8b04e3fad5d23391960ed13": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "27ec9d176d11451bb049b62c278a86ff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "29d523b694174b7596944eeb86a553d0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_73d7ade0b58b41d1b1ac109026eeabc8", - "IPY_MODEL_cc1c0fcd84b94a199612c3e7ccd906cd", - "IPY_MODEL_5dbe5cc9d6e64e5cb62e7018a42e1f8e" - ], - "layout": "IPY_MODEL_56de5716ee0146158e399759aef55c41" - } - }, - "2a59d91e7621422ebda4fefca0ee6760": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "38bd9b6cec8f42f1a9b2caca71478f4b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3919381f1ae247219c7e4378a5d2e1ff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3bd3d79c0262467296061f64606e57ce": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_024598891b4f46299dc20b5cfd714e0c", - "IPY_MODEL_9846ac95a9864f6aad40bffcd1595c48", - "IPY_MODEL_f7e1a279ca7a4576a67d600c6e0fcad6" - ], - "layout": "IPY_MODEL_1eda4198a078469dbba236c3ed8654c3" - } - }, - "420798f709e2420d81d7223c34ca442e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "44b4d7daccdb46f19db7675c3a7d4f49": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "45ce30572c20425691ebdabe0696b0ec": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "46097609bd4b46fa94c27a5dcfe98a1a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "4e15263fae0140299c6a55ce95f7bd43": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4f1165cdc7ef4701889d0e6de6ac9ed1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "50215024305b41c38aec0a3808b3bc84": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "54af3da7793c404fa8b4e1062185ea68": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_24ae74e4073749fba785b660dac48f4c", - "IPY_MODEL_895f37ac364f4c1aa4b3089fa286fca3", - "IPY_MODEL_f63e1751a94246888bf0426a2288cb36" - ], - "layout": "IPY_MODEL_90076a55ec674636b93c7b1d741ea374" - } - }, - "56de5716ee0146158e399759aef55c41": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5b034562b2354e70a27bc06f5fe674cd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5dbe5cc9d6e64e5cb62e7018a42e1f8e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_be5d5dcca8cb498d8eb982b1cc1273fc", - "placeholder": "​", - "style": "IPY_MODEL_f6e97fcb881443beaec839bd64530d2d", - "value": " 438M/438M [00:02<00:00, 248MB/s]" - } - }, - "5e532f20ae6d4a5c90d5beba1518d3ee": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5f145f7ffcd540149cd775f01e3da418": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f76cfc2d293d4b409e4fc8bfa805af96", - "placeholder": "​", - "style": "IPY_MODEL_167d14dc1f3b42fe9f4d9cc2ec341363", - "value": "tokenizer_config.json: 100%" - } - }, - "667241a7a4e6442b9e32450dbcbb0f56": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6766b3d159fd4c29b853f3ad44616429": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3919381f1ae247219c7e4378a5d2e1ff", - "max": 405, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2a59d91e7621422ebda4fefca0ee6760", - "value": 405 - } - }, - "67f985db0d7b41f7b15f135d6acb039e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_97d1348ebec44687ac2a9151d52b1e8f", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ae77818599cd4bc2ac761865e81c3f15", - "value": 231508 - } - }, - "6da62a5ad31940329f00748ad6eab4da": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_44b4d7daccdb46f19db7675c3a7d4f49", - "placeholder": "​", - "style": "IPY_MODEL_ee9dca0e0f2c49a49fb50b623818cda9", - "value": " 232k/232k [00:00<00:00, 1.77MB/s]" - } - }, - "73d7ade0b58b41d1b1ac109026eeabc8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7f05356467fa4c2ab321004efa06e9c9", - "placeholder": "​", - "style": "IPY_MODEL_ed08c56e20194dbca6732642fb4af466", - "value": "model.safetensors: 100%" - } - }, - "747558448b5e40038b270a6a6f6af6f0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7d4aa0529fb74e81a08cc12aeb243456": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4e15263fae0140299c6a55ce95f7bd43", - "placeholder": "​", - "style": "IPY_MODEL_cacdb3a3a0e04ca3b744fb82a3dcc925", - "value": "vocab.txt: 100%" - } - }, - "7f05356467fa4c2ab321004efa06e9c9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "81a8270d87ef4c9b80c46c5236c8292f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7d4aa0529fb74e81a08cc12aeb243456", - "IPY_MODEL_67f985db0d7b41f7b15f135d6acb039e", - "IPY_MODEL_6da62a5ad31940329f00748ad6eab4da" - ], - "layout": "IPY_MODEL_27ec9d176d11451bb049b62c278a86ff" - } - }, - "81ee5fe4f8044ab9819b9f767c41826e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8209dde69d4147739c522342bfedcccd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_066c98c9848e4e00b68d0e98ec6f3c1f", - "IPY_MODEL_c88515f583bd469ca0d6ca54a812ca14", - "IPY_MODEL_a47e31ce610b4dcf8ac934ec11aefc65" - ], - "layout": "IPY_MODEL_38bd9b6cec8f42f1a9b2caca71478f4b" - } - }, - "895f37ac364f4c1aa4b3089fa286fca3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_747558448b5e40038b270a6a6f6af6f0", - "max": 112, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_81ee5fe4f8044ab9819b9f767c41826e", - "value": 112 - } - }, - "8d18d3f17569471fade4a2df380a245c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8d7d0da8d2344625aeef3d1c452a9c68": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "90076a55ec674636b93c7b1d741ea374": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "944a78e6adaf4e3a87551d0bd5a6fc75": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "97d1348ebec44687ac2a9151d52b1e8f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9846ac95a9864f6aad40bffcd1595c48": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_11c36278698f4a6e8f606811eaff2166", - "max": 466081, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c8a050cfb1164c1cbecb0a86bc555d9c", - "value": 466081 - } - }, - "9a8317a8c8754d4d8b513a7fb0366c8d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a2a6f8043e9943c7a6ec9112ac3d33bd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a42220b511f14cd8b89f5071c0d216a4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a47e31ce610b4dcf8ac934ec11aefc65": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a2a6f8043e9943c7a6ec9112ac3d33bd", - "placeholder": "​", - "style": "IPY_MODEL_8d18d3f17569471fade4a2df380a245c", - "value": " 743/743 [00:00<00:00, 64.1kB/s]" - } - }, - "ae77818599cd4bc2ac761865e81c3f15": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "be5d5dcca8cb498d8eb982b1cc1273fc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c88515f583bd469ca0d6ca54a812ca14": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ea0ed18363ec4a86b0383e0b43d38ac7", - "max": 743, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_dfb0d5f3c9ae46dc910d335a9215521a", - "value": 743 - } - }, - "c8939bbe84c24ff8ad43c8d996d29af2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c8a050cfb1164c1cbecb0a86bc555d9c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "cacdb3a3a0e04ca3b744fb82a3dcc925": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cb69dbb882694ed3bab1a2b35e0df524": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cc1c0fcd84b94a199612c3e7ccd906cd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_266e8497e8b04e3fad5d23391960ed13", - "max": 438349816, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0bebf69871bb4d04a5329ecb32d64b06", - "value": 438349816 - } - }, - "d1b166882cef441c816a75b784b3dcb0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_5f145f7ffcd540149cd775f01e3da418", - "IPY_MODEL_6766b3d159fd4c29b853f3ad44616429", - "IPY_MODEL_e3ec24ca9f384b6e8a6b25f66c9a2872" - ], - "layout": "IPY_MODEL_5b034562b2354e70a27bc06f5fe674cd" - } - }, - "d44706bfc8494edc8f266d3a94ff16a2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "dd0ddf2594eb42b4babe6eeaf6a59bbb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "dfb0d5f3c9ae46dc910d335a9215521a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e3084de2764a48089029ffafa1087e8a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e3ec24ca9f384b6e8a6b25f66c9a2872": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d44706bfc8494edc8f266d3a94ff16a2", - "placeholder": "​", - "style": "IPY_MODEL_13dd434100e747588f8be140f55305a3", - "value": " 405/405 [00:00<00:00, 30.5kB/s]" - } - }, - "ea0ed18363ec4a86b0383e0b43d38ac7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ed08c56e20194dbca6732642fb4af466": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ee9dca0e0f2c49a49fb50b623818cda9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f63e1751a94246888bf0426a2288cb36": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4f1165cdc7ef4701889d0e6de6ac9ed1", - "placeholder": "​", - "style": "IPY_MODEL_1601603b8da04598b2a3b1b6532b9de9", - "value": " 112/112 [00:00<00:00, 8.30kB/s]" - } - }, - "f6e97fcb881443beaec839bd64530d2d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f76cfc2d293d4b409e4fc8bfa805af96": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f7e1a279ca7a4576a67d600c6e0fcad6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_004ad74940344b6eb376ae4cfc85f26b", - "placeholder": "​", - "style": "IPY_MODEL_cb69dbb882694ed3bab1a2b35e0df524", - "value": " 466k/466k [00:00<00:00, 3.50MB/s]" - } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "85021e2e71eb4797b12938e40dd520ca": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_034aa03bfb4a40b995abac507b730fb0", + "IPY_MODEL_ed0530d56c834c3396a29f6ace0d0002", + "IPY_MODEL_9ac90a1242fd4538a0ec300ad6c8580d" + ], + "layout": "IPY_MODEL_480005206d7a4d8ba5345d1fb7176d21" + } + }, + "034aa03bfb4a40b995abac507b730fb0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0ce54b97711e42b6a543c98df81bf183", + "placeholder": "​", + "style": "IPY_MODEL_fcc1a1157c964fd3952f2ceeb75380fb", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "ed0530d56c834c3396a29f6ace0d0002": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ced7a587317f4befbe2b163b2c14b446", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_69a48a84064f47d8bf5f4d111dda9400", + "value": 8098525888 + } + }, + "9ac90a1242fd4538a0ec300ad6c8580d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f643e8e8bce41609b418af53b374174", + "placeholder": "​", + "style": "IPY_MODEL_b6bd4737fa764ae2aa3112d76d78d699", + "value": " 8.10G/8.10G [03:12<00:00, 41.5MB/s]" + } + }, + "480005206d7a4d8ba5345d1fb7176d21": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0ce54b97711e42b6a543c98df81bf183": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fcc1a1157c964fd3952f2ceeb75380fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ced7a587317f4befbe2b163b2c14b446": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "69a48a84064f47d8bf5f4d111dda9400": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7f643e8e8bce41609b418af53b374174": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b6bd4737fa764ae2aa3112d76d78d699": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/benchmark/qwen_2_5_7B_perfect_context.ipynb b/benchmark/qwen_2_5_7B_perfect_context.ipynb index e0f5d97..f9e633d 100644 --- a/benchmark/qwen_2_5_7B_perfect_context.ipynb +++ b/benchmark/qwen_2_5_7B_perfect_context.ipynb @@ -44,20 +44,20 @@ "base_uri": "https://localhost:8080/" }, "id": "QrgOGtuGlyhT", - "outputId": "340ae5a6-0710-40e8-baba-390c143061cb" + "outputId": "23c329f7-3cea-49af-b969-1bad857de7f1" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Cloning into 'structured-qa'...\n", - "remote: Enumerating objects: 795, done.\u001b[K\n", - "remote: Counting objects: 100% (233/233), done.\u001b[K\n", - "remote: Compressing objects: 100% (134/134), done.\u001b[K\n", - "remote: Total 795 (delta 148), reused 126 (delta 92), pack-reused 562 (from 1)\u001b[K\n", - "Receiving objects: 100% (795/795), 2.27 MiB | 5.75 MiB/s, done.\n", - "Resolving deltas: 100% (430/430), done.\n" + "remote: Enumerating objects: 803, done.\u001b[K\n", + "remote: Counting objects: 100% (241/241), done.\u001b[K\n", + "remote: Compressing objects: 100% (137/137), done.\u001b[K\n", + "remote: Total 803 (delta 154), reused 133 (delta 97), pack-reused 562 (from 1)\u001b[K\n", + "Receiving objects: 100% (803/803), 2.29 MiB | 28.93 MiB/s, done.\n", + "Resolving deltas: 100% (436/436), done.\n" ] } ], @@ -67,105 +67,113 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": { "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + "base_uri": "https://localhost:8080/" }, "id": "S22kTrfPlyhU", - "outputId": "e3fa4c42-57a5-4fa4-e43e-5b411a3c45fa" + "outputId": "71ee6d98-0c8d-41fa-d3b7-574021b403ea" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Processing ./structured-qa\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (3.11.0)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev84+g7b9c96c) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev84+g7b9c96c) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.23.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (24.3.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.36.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.17.0)\n", - "Building wheels for collected packages: structured-qa\n", + "Collecting fire (from structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev86+gb726447) (0.27.1)\n", + "Collecting loguru (from structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev86+gb726447) (2.10.6)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev86+gb726447) (6.0.2)\n", + "Collecting rapidfuzz (from structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev86+gb726447) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev86+gb726447) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev86+gb726447) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev86+gb726447)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (1.24.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev86+gb726447) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev86+gb726447) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m63.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m61.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev84+g7b9c96c-py3-none-any.whl size=16325 sha256=3a2543903414e4e12121937c7c91c685062c83f3fc53f84a7316c8bec56b4181\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev86+gb726447-py3-none-any.whl size=16326 sha256=dd9419f56083c11c4717d20ca525f0f2ec3d4d42390326c036c361d00086a56f\n", " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - "Successfully built structured-qa\n", - "Installing collected packages: structured-qa\n", - " Attempting uninstall: structured-qa\n", - " Found existing installation: structured-qa 0.3.3.dev84+g7b9c96c\n", - " Uninstalling structured-qa-0.3.3.dev84+g7b9c96c:\n", - " Successfully uninstalled structured-qa-0.3.3.dev84+g7b9c96c\n", - "Successfully installed structured-qa-0.3.3.dev84+g7b9c96c\n" + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=a99cfbf78323f009ff3d061ed4ab77b8b8b52f29206689bd5fa5800c30096cfc\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.11.0 streamlit-1.41.1 structured-qa-0.3.3.dev86+gb726447 watchdog-6.0.0\n" ] - }, - { - "data": { - "application/vnd.colab-display-data+json": { - "id": "df14365dea1e4a95896a43fb5764312a", - "pip_warning": { - "packages": [ - "structured_qa" - ] - } - } - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ @@ -174,27 +182,27 @@ }, { "cell_type": "code", - "execution_count": 6, + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ], "metadata": { + "id": "mZtwFXA5IOvn", + "outputId": "853d0379-9081-4447-c8a0-4a57267c9a48", "colab": { "base_uri": "https://localhost:8080/" - }, - "id": "mZtwFXA5IOvn", - "outputId": "c3b6fd2a-27f5-44e7-b5f6-b05bde51a979" + } }, + "execution_count": 3, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } - ], - "source": [ - "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" ] }, { @@ -208,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": { "id": "iJ812u2llyhV" }, @@ -221,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": { "id": "jWlaKC5qXZrh" }, @@ -241,12 +249,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": { "id": "oFU-eYMVlyhX" }, "outputs": [], "source": [ + "import time\n", + "\n", + "\n", "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", "You are a rigorous assistant answering questions.\n", "You only answer based on the current information available.\n", @@ -314,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": { "id": "6RoEbYj3XZri" }, @@ -325,32 +336,32 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { + "id": "ObsvwlNslyhZ", + "outputId": "6674c169-f578-4712-9c65-8480a8e3f8ed", "colab": { "base_uri": "https://localhost:8080/", - "height": 173, + "height": 153, "referenced_widgets": [ - "92e5c930e73d4fce9d0d79de5b12fbc3", - "4fbd860349a44d1faaaa708ee8d81cd0", - "1909a0309d9d4293a10da4abca6a4468", - "47f329f3d9c74758ad6e96ee76036b90", - "a96b9b499a11466da9b2efc8e55689c3", - "e1bfae86937042d8a105f04e3ccdb697", - "8733a460cd564e5f8b9177c4b37bc1d5", - "0e89d5adf51e49d6a7a5e720af8c0f3d", - "b4092c4c9e124f6bb3ec25fbda77044a", - "eaa38024ac24497080f3cff4ddc5b39b", - "a4935eb54fc2442fa243555f0b572ca0" + "00bbe62a72a64d7580010b8fdd1c9cd6", + "ab4af327ce664d9ba3240c66f33481cf", + "4f1c0bbaa5c5499a817a193bb7a4ecef", + "971414c1218e4de4a2973a4bf7943a81", + "6590c75ff1484ecd81d7db62b1b8e2ca", + "fd067a1b26c44ec4896bd2241fb7e3ce", + "ccbc263250264f31ab6398e96d7892d5", + "cd5b4abbbeb843e19f85387bc0522c33", + "a32d0ceeaf284c32aac7768df858e9e9", + "ff421a4ded21400a8e04c2c67fb5a03a", + "9b3a80c2cbc34259a8dc8b0b02c706ea" ] - }, - "id": "ObsvwlNslyhZ", - "outputId": "f8e3573d-30d7-4f57-d8eb-d430d42e3755" + } }, "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", @@ -361,18 +372,18 @@ ] }, { + "output_type": "display_data", "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "92e5c930e73d4fce9d0d79de5b12fbc3", - "version_major": 2, - "version_minor": 0 - }, "text/plain": [ "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:00.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:00.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:01.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:01.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:01.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:01.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:01.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:02.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: EI30\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:02.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:02.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:02.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:02.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:03.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:03.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:03.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:03.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:04.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:04.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:04.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:04.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:04.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:04.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:05.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:06.241\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:06.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:06.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:07.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:07.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:07.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:07.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:08.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Number\n", + "\u001b[32m2025-01-30 10:01:01.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:01.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:01.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:02.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:02.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:03.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:03.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:03.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:03.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: EI30\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:03.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:03.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:04.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:04.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:04.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:04.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:05.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:06.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:06.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:06.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:07.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:08.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:08.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:08.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:08.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Number\n", "Question: How many parameters are in the toy model (y = x^2) tree?\n", "Answer: 14\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:08.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:08.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:08.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:09.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: Fewer operations\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:09.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:09.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:09.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:09.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:09.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:10.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:10.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:10.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:10.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:10.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:11.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Five\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:11.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:11.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:11.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:11.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 100\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:12.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:13.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:13.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:13.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:13.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:13.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:13.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:14.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:14.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:14.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:14.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:14.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:14.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:15.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:15.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:15.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:15.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\n", - "Based on the information provided, the out-of-domain evaluation for symbolic reasoning tasks includes the coin flip task. The last letter concatenation task is described as a \"toy task\" with in-domain and out-of-domain evaluations, but the coin flip task is not mentioned in the context of a different task. Therefore, the correct answer is A: Coin Flip.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:18.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:20.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1024\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:20.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:20.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: I need more info.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:20.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:20.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:20.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:20.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:21.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:21.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:21.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:21.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Pdrop = 0.1\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:21.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:21.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:22.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:22.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:22.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:22.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:22.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: It is cached for later use and to avoid recompilation.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:23.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:23.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:23.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:23.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:24.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:24.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:24.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:24.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:24.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:24.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:25.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:26.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:26.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:26.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:27.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:28.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:28.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:28.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:28.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:28.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:28.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:29.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:29.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:29.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:30.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:30.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:30.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:31.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: Datasets ensuring quality and diversity\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:31.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:31.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:32.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:33.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:33.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:33.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:33.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:33.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:33.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:33.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:34.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:34.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:34.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: Player 1\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:35.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:37.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:38.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:38.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:38.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:38.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:38.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:39.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:41.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:41.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:41.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:41.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:41.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:42.010\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:42.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:42.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:43.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:43.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:43.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:43.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:43.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:43.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:44.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:44.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:44.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:45.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:45.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:45.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:45.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:45.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:46.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:47.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:47.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:47.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:47.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:47.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:48.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:48.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:48.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:49.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:49.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:50.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:50.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:50.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-29 13:02:50.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n" + "\u001b[32m2025-01-30 10:01:10.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:10.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:11.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:11.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:11.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:11.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:11.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:12.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Five\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:12.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:12.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:12.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:12.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 100\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.010\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:13.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:14.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:15.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:15.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:15.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:16.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:16.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:16.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:19.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\n", + "Based on the information provided, the out-of-domain evaluation for symbolic reasoning tasks includes the coin flip task, as it mentions \"For last letter concatenation, the model only sees exemplars of names with two words, and then performs last letter concatenation on names with 3 and 4 words.4 We do the same for the number of potential flips in the coin flip task.\" This indicates that the coin flip task is used for out-of-domain evaluation.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:19.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:19.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:20.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:20.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:20.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:20.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:20.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:21.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1024\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:21.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:21.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: I need more info.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:21.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:21.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Pdrop = 0.1\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:22.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:23.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:23.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:23.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:23.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:23.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:23.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:24.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: It is cached for later use and to avoid recompilation.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:24.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:24.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:24.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:24.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:25.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:26.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:27.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:27.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:27.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:27.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:27.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:28.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:29.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:29.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:29.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:29.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:29.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:30.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:30.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:30.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:30.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:30.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:30.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: Datasets ensuring quality and diversity\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:32.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:33.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: 10 years\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:33.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:33.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:34.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:35.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:35.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:35.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:35.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:35.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:36.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:36.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:36.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: Player 1\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:36.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:36.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:37.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:37.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:37.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:37.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:37.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:38.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:39.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:39.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:39.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:39.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:39.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:40.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:40.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:40.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:41.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:44.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: 7,500,000 EUR or 1% of annual turnover, whichever is higher.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:44.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:44.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:44.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:44.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:44.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:45.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:45.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:45.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:46.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:46.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:46.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:46.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:46.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:47.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:48.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:48.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:48.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:48.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:48.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:49.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:49.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:49.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:50.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:50.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:50.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:50.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:50.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-30 10:01:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n" ] } ], @@ -703,25 +714,67 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 332 }, "id": "EYYJgWf6lyha", - "outputId": "dc1b33dd-ee28-4f01-d7a4-9ba023aa8a73" + "outputId": "d909a39d-ca09-41de-dc7a-32903880211d" }, "outputs": [ { + "output_type": "execute_result", "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 16,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25,\n \"min\": 10,\n \"max\": 85,\n \"num_unique_values\": 16,\n \"samples\": [\n 10,\n 14,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"EXPEDITION PHASE\",\n \"1.2.1. Internal partitions and doors\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 16,\n \"samples\": [\n \"What was the dropout rate used for the base model?\",\n \"How many parameters are in the toy model (y = x^2) tree?\",\n \"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"NO\",\n \"14\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"NO\",\n \"A: EI30\",\n \"PDROP = 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "14 14 https://arxiv.org/pdf/2210.05189 \n", + "38 38 https://arxiv.org/pdf/2201.11903 \n", + "43 43 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "14 3 Experimental Results \n", + "38 3.1 Experimental Setup \n", + "43 3.4 Robustness of Chain of Thought \n", + "47 CARD AND TILE EFFECTS \n", + "51 CHAPTER OVERVIEW \n", + "52 CARD AND TILE COSTS \n", + "65 EXPEDITION PHASE \n", + "78 5.2. Thread Hierarchy \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "14 How many parameters are in the toy model (y = ... 14 \n", + "38 How many large language models were evaluated? 5 \n", + "43 How many annotators provided independent chain... 3 \n", + "47 How many different races are there? 6 \n", + "51 After taking a landmark tile, do you reveal a ... NO \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "65 Do you need a fish to conquer a distant island? YES \n", + "78 Can you identify a thread with a four-dimensio... NO \n", + "\n", + " pred_answer pred_section \n", + "10 PDROP = 0.1 NaN \n", + "14 NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN T... NaN \n", + "38 FIVE NaN \n", + "43 2 NaN \n", + "47 5 NaN \n", + "51 YES NaN \n", + "52 NO NaN \n", + "65 NO NaN \n", + "78 I NEED MORE INFO. NaN " + ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP= 0.1NaN
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Classification of general-purpose AI models as...What is the threshold, measured in floating po...CBNaN
2828https://eur-lex.europa.eu/legal-content/EN/TXT...Compliant AI systems which present a riskWhat is the time period for a market surveilla...CANaN
4444https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?6NUMBER OF DIFFERENT RACES: 7NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5353https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSIf a player is missing 2 skill symbols, how ma...2NO\\n- SINGLE LETTER (FOR MULTIPLE-CHOICE QUEST...NaN
5555https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BANaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 10,\n \"max\": 65,\n \"num_unique_values\": 10,\n \"samples\": [\n 55,\n 22,\n 51\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"Classification of general-purpose AI models as general-purpose AI models with systemic risk\",\n \"CHAPTER OVERVIEW\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\",\n \"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\",\n \"After taking a landmark tile, do you reveal a new tile and the end of your turn?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"C\",\n \"YES\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"B\",\n \"YES\",\n \"PDROP= 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 21 - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "for index, result in results.iterrows():\n", - " if result[\"pred_answer\"].startswith((f\"-{result['answer']}\", f\"{result['answer']}\")):\n", - " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Shapes:\n", + "encodings: torch.Size([56, 508, 128])\n", + "doc_masks: torch.Size([56, 508])\n", + "Documents encoded!\n" + ] }, { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "c4z9XxXWq3m1", - "outputId": "29f0fe17-85ce-4792-f6c5-08a8b8e2c6b3" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0.898989898989899" - ] - }, - "metadata": {}, - "execution_count": 22 - } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-30 12:57:53.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:53.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:53.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mAt each step the model is auto-regressive\n", + "[10], consuming the previously generated symbols as additi\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:54.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m6\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:54.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:54.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mTo facilitate these residual connections, all sub-layers in the model, as well as the embedding\n", + "laye\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:54.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m6\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:54.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:54.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mIn this work we employ h= 8 parallel attention layers, or heads. For each of these we use\n", + "dk=dv=dmod\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:55.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m8\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:55.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:55.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m3.3 Position-wise Feed-Forward Networks\n", + "In addition to attention sub-layers, each of the layers in o\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:56.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:56.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:56.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mTo this end, we add \"positional encodings\" to the input embeddings at the\n", + "bottoms of the encoder and\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:57.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:57.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:57.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mSentence pairs were batched together by approximate sequence length. Each training\n", + "batch contained a\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:58.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m8\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:58.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:58.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mSentence pairs were batched together by approximate sequence length. Each training\n", + "batch contained a\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:58.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:58.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:58.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mThe big models were trained for 300,000 steps\n", + "(3.5 days).\n", + "5.3 Optimizer\n", + "We used the Adam optimizer [\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:59.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:59.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-30 12:57:59.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mThe big models were trained for 300,000 steps\n", + "(3.5 days).\n", + "5.3 Optimizer\n", + "We used the Adam optimizer [\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:00.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m4000\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:00.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:00.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m36 41.29 7.7·10191.2·1021\n", + "Transformer (base model) 27.3 38.1 3.3·1018\n", + "Transformer (big) 28.4 41.8 2.\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:01.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mPdrop= 0.1\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:01.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:01.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:01.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 137 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:15.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:15.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 199 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:26.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:26.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 44 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:35.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:35.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 143 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:46.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:46.051\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 364 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/12 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:59.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:58:59.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 1803 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r 0%| | 0/57 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-30 12:59:42.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689 to ?uri=OJ:L_202401689.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 12:59:42.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 754 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/24 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:02.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:02.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 17 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:22.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:22.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", + "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " self.scaler = torch.cuda.amp.GradScaler()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Encoding 48 documents...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/2 [00:00Discard any remaining, face-up Island cards and reveal new ones.\n", + " >Pass the First player marker to \u001b[0m\n", + "\u001b[32m2025-01-30 13:00:29.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m25\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:29.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:29.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m>There is no limit to the number, type, or order of \n", + "actions a player may take during the Action pha\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:30.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:30.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:30.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mRations are needed for the long journey!\n", + "A player can choose to Pillage a selected Island card with\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:30.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:30.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:31.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mEach \n", + "action draws the clans closer to becoming the greatest empire! The \n", + "game ends in the same roun\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:31.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m1\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:31.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:31.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mGAME FLOW\n", + "Note for Imperial Settlers fans \n", + "You cannot Spend 2 Workers \n", + "to get a Resource or a card.\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:32.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:32.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:32.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", + "the Resources on the righ\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:33.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1m1\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:33.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:33.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mNOTE 2: Some abilities in the \n", + "game have a ‘/’ divider between \n", + "presented choices. This should be \n", + "t\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:34.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:34.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:34.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mThus allowing a player to play \n", + "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:35.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:35.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:35.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:36.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:36.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:36.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", + "(Gold tokens assigned to cards are\u001b[0m\n", + "\u001b[32m2025-01-30 13:00:37.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m58\u001b[0m - \u001b[1mA\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "mltqL7Bhq3m1", + "outputId": "7779f94c-b541-4463-98d5-17e049981a40" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 10,\n \"max\": 65,\n \"num_unique_values\": 10,\n \"samples\": [\n 55,\n 22,\n 51\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"Classification of general-purpose AI models as general-purpose AI models with systemic risk\",\n \"CHAPTER OVERVIEW\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\",\n \"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\",\n \"After taking a landmark tile, do you reveal a new tile and the end of your turn?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"C\",\n \"YES\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"B\",\n \"YES\",\n \"PDROP= 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" }, - "outputs": [], - "source": [] + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP= 0.1NaN
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Classification of general-purpose AI models as...What is the threshold, measured in floating po...CBNaN
2828https://eur-lex.europa.eu/legal-content/EN/TXT...Compliant AI systems which present a riskWhat is the time period for a market surveilla...CANaN
4444https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?63NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5353https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSIf a player is missing 2 skill symbols, how ma...2NO\\n- SINGLE LETTER (FOR MULTIPLE-CHOICE QUEST...NaN
5555https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BANaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "28 28 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "44 44 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "53 53 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "22 Classification of general-purpose AI models as... \n", + "28 Compliant AI systems which present a risk \n", + "44 3.2 Results \n", + "47 CARD AND TILE EFFECTS \n", + "51 CHAPTER OVERVIEW \n", + "52 CARD AND TILE COSTS \n", + "53 CARD AND TILE COSTS \n", + "55 CARD AND TILE EFFECTS \n", + "65 EXPEDITION PHASE \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "22 What is the threshold, measured in floating po... C \n", + "28 What is the time period for a market surveilla... C \n", + "44 How many random samples were examined to under... 100 \n", + "47 How many different races are there? 6 \n", + "51 After taking a landmark tile, do you reveal a ... NO \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "53 If a player is missing 2 skill symbols, how ma... 2 \n", + "55 Which type of cards provide coins? -A: Gray -B... B \n", + "65 Do you need a fish to conquer a distant island? YES \n", + "\n", + " pred_answer pred_section \n", + "10 PDROP= 0.1 NaN \n", + "22 B NaN \n", + "28 A NaN \n", + "44 50 NaN \n", + "47 3 NaN \n", + "51 YES NaN \n", + "52 NO NaN \n", + "53 NO\\n- SINGLE LETTER (FOR MULTIPLE-CHOICE QUEST... NaN \n", + "55 A NaN \n", + "65 NO NaN " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { - "accelerator": "GPU", + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "85021e2e71eb4797b12938e40dd520ca": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_034aa03bfb4a40b995abac507b730fb0", - "IPY_MODEL_ed0530d56c834c3396a29f6ace0d0002", - "IPY_MODEL_9ac90a1242fd4538a0ec300ad6c8580d" - ], - "layout": "IPY_MODEL_480005206d7a4d8ba5345d1fb7176d21" - } - }, - "034aa03bfb4a40b995abac507b730fb0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0ce54b97711e42b6a543c98df81bf183", - "placeholder": "​", - "style": "IPY_MODEL_fcc1a1157c964fd3952f2ceeb75380fb", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" - } - }, - "ed0530d56c834c3396a29f6ace0d0002": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ced7a587317f4befbe2b163b2c14b446", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_69a48a84064f47d8bf5f4d111dda9400", - "value": 8098525888 - } - }, - "9ac90a1242fd4538a0ec300ad6c8580d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7f643e8e8bce41609b418af53b374174", - "placeholder": "​", - "style": "IPY_MODEL_b6bd4737fa764ae2aa3112d76d78d699", - "value": " 8.10G/8.10G [03:12<00:00, 41.5MB/s]" - } - }, - "480005206d7a4d8ba5345d1fb7176d21": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0ce54b97711e42b6a543c98df81bf183": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fcc1a1157c964fd3952f2ceeb75380fb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ced7a587317f4befbe2b163b2c14b446": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "69a48a84064f47d8bf5f4d111dda9400": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "7f643e8e8bce41609b418af53b374174": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b6bd4737fa764ae2aa3112d76d78d699": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } + "base_uri": "https://localhost:8080/" + }, + "id": "c4z9XxXWq3m1", + "outputId": "ed00666d-0e36-4e5c-9b62-a2ea14c441cc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.898989898989899" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "UXg_TC7R28QI" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "00b65fb9908f4e1dbd09b8cc59235605": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "02036bfb1d074b4d89c89bb5faae004a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0369b053c6424fd7bad357433df8dc85": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "05fd61c2436242619810c829734411ac": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_edcb83aee99b4d67a3e5c1e3f73e418f", + "placeholder": "​", + "style": "IPY_MODEL_7f115f13f6bd40ff8273c8f70c888e9d", + "value": " 232k/232k [00:00<00:00, 1.09MB/s]" + } + }, + "07c171affc4b4c548b79dd1153d3150b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0ea58730ba05467dba2f60fea0f1eddc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_07c171affc4b4c548b79dd1153d3150b", + "max": 1633, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ade3b5dbacd1438380933cac3eb81800", + "value": 1633 + } + }, + "10d52f98ecc6417d96cf53cac822c9c6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "16d485c3159b44efaff7e6392a73ed11": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_02036bfb1d074b4d89c89bb5faae004a", + "placeholder": "​", + "style": "IPY_MODEL_2a99d23d260e41b491d6cefdad20e08a", + "value": " 743/743 [00:00<00:00, 53.7kB/s]" + } + }, + "1e00c5a5c32b4708a12ac145a2a08a94": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "20461602ac2647a4a8607f89e895cbd1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "25ad96bfb5ba48258320596a2b65c9c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c566d7af0eb14e4db6f53b2426cbe5da", + "IPY_MODEL_c9f1003b9ea747a78bde82411de091ba", + "IPY_MODEL_38e676d196d14126b306cf95e83f9a14" + ], + "layout": "IPY_MODEL_effc2dbaafe8467ca8bc0e151a72566f" + } + }, + "2612c23e742949a0ae3f37c1acbe54a9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_56c24d6c3bb84ebeb8e63ae7685b622c", + "placeholder": "​", + "style": "IPY_MODEL_d605bbfa7de945ff8f2908fc601d786d", + "value": "vocab.txt: 100%" + } + }, + "2913682df125489d8dabe312893c96bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fdf81aa2e92d4c3c998798e820d5c3c8", + "IPY_MODEL_3c3166f5f51946939a03ada7d6ea951a", + "IPY_MODEL_ec00a4a5db4c4f6dbc9ffd6064534d6e" + ], + "layout": "IPY_MODEL_a6fd770f03f440c5859d1fe2e49351d4" + } + }, + "2a54cabb44c041d08863c2985f58307d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4b82be23d7174558821158562380f29b", + "IPY_MODEL_0ea58730ba05467dba2f60fea0f1eddc", + "IPY_MODEL_fe986e4b2af14539ac7c8ebd9693579a" + ], + "layout": "IPY_MODEL_b2d198b0d4a94766a2344f5aa2748e8c" + } + }, + "2a99d23d260e41b491d6cefdad20e08a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2f8c15ff6cac41fca08cbd7e1e3a1620": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2612c23e742949a0ae3f37c1acbe54a9", + "IPY_MODEL_b7fcea6c5c624801ba709bcdd02bb0de", + "IPY_MODEL_05fd61c2436242619810c829734411ac" + ], + "layout": "IPY_MODEL_9a7fffad5ad0476d95b78b722a56e757" + } + }, + "307c70a3dfbd4c4c97b1f9797c935b50": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "37e061fb7a704c40bd0ee66b3091853b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "38a552b48b91413183504730b900d0bc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "38e676d196d14126b306cf95e83f9a14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_42f729466b3a401bb6f1f6079b369e41", + "placeholder": "​", + "style": "IPY_MODEL_00b65fb9908f4e1dbd09b8cc59235605", + "value": " 466k/466k [00:00<00:00, 974kB/s]" + } + }, + "3addfa7153ad4674aad4bb8d32268e7e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3aecfd22fa5c48bc8ebca076cd802f4c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_89a7175a759e4722a74060d98128487c", + "IPY_MODEL_f4baaaa3aadc4bbb9e2f0087ce6273b1", + "IPY_MODEL_f2c9cf22f1ec47d18dbbd14fd60bd525" + ], + "layout": "IPY_MODEL_3addfa7153ad4674aad4bb8d32268e7e" + } + }, + "3c3166f5f51946939a03ada7d6ea951a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_307c70a3dfbd4c4c97b1f9797c935b50", + "max": 405, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9211cb4be41d47d7a6dede4b51621111", + "value": 405 + } + }, + "42d0723e2451483f8786a6522680a7c0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "42f729466b3a401bb6f1f6079b369e41": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4a2c7268f067428a950351f564432f4e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4b82be23d7174558821158562380f29b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a9fba03ff33048ce92eef9452837999c", + "placeholder": "​", + "style": "IPY_MODEL_37e061fb7a704c40bd0ee66b3091853b", + "value": "artifact.metadata: 100%" + } + }, + "4be9ea11e9c54626810d6d3ff5d073b8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4fae803d8c4f45e9aec71bf7fd1bde01": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "53758cdbe1b0452cafdd55640b1165d6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "56c24d6c3bb84ebeb8e63ae7685b622c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5ade4f272cc346f9aac412049a3d13e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "66c946c5b1434087a3c838feabff07c5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "687cf1258e2b47ffb1a169307f78cd61": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "68c078f715454f3c953787aeb38fe1be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7562207c53ac41e48daafbd9518ea580": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7a2bbc79b1ba4dacad15b290e29e869f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7f115f13f6bd40ff8273c8f70c888e9d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "81d5f2d59dc2425a87ea419b345ab988": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "86ed0f5dbf0a4cd2855d2e635ee68aaf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "87b3d8d920544fd2b8d1eae105441ef9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8923512c87c244c19783ff200b4d5454": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_87b3d8d920544fd2b8d1eae105441ef9", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8a7b840fb46c434d8ebe6bbe5c69a044", + "value": 8098525888 + } + }, + "89a7175a759e4722a74060d98128487c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_38a552b48b91413183504730b900d0bc", + "placeholder": "​", + "style": "IPY_MODEL_cd93f50f87cb4169b08110f03545f8bd", + "value": "model.safetensors: 100%" + } + }, + "8a7b840fb46c434d8ebe6bbe5c69a044": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8c4849cdd00540519c18855be4376548": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ec927e652084cc0bf5b513275f5efc3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8f553121841441ba8dcd4fc19cadd037": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b99313423b894fb193eb77aa638b0422", + "placeholder": "​", + "style": "IPY_MODEL_c8a7f2ff55d34102a19189e23feb3ae4", + "value": " 112/112 [00:00<00:00, 7.14kB/s]" + } + }, + "9211cb4be41d47d7a6dede4b51621111": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9a7fffad5ad0476d95b78b722a56e757": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a6fd770f03f440c5859d1fe2e49351d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9fba03ff33048ce92eef9452837999c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "abace962a40b49cd8d21047f0c961620": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c021f82bc25341e7a10112750316a37b", + "IPY_MODEL_e826fb0e17594e3e9c1cb81c7848b636", + "IPY_MODEL_16d485c3159b44efaff7e6392a73ed11" + ], + "layout": "IPY_MODEL_4be9ea11e9c54626810d6d3ff5d073b8" + } + }, + "ade3b5dbacd1438380933cac3eb81800": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b1a9887c49774487849bd47ae9edc927": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_42d0723e2451483f8786a6522680a7c0", + "placeholder": "​", + "style": "IPY_MODEL_53758cdbe1b0452cafdd55640b1165d6", + "value": "special_tokens_map.json: 100%" + } + }, + "b2d198b0d4a94766a2344f5aa2748e8c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b5d106d794bd4aaa9cbfed7c8f98b1ab": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b7fcea6c5c624801ba709bcdd02bb0de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_10d52f98ecc6417d96cf53cac822c9c6", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7562207c53ac41e48daafbd9518ea580", + "value": 231508 + } + }, + "b99313423b894fb193eb77aa638b0422": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c021f82bc25341e7a10112750316a37b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_86ed0f5dbf0a4cd2855d2e635ee68aaf", + "placeholder": "​", + "style": "IPY_MODEL_ffcd545025454565866309ab2028a986", + "value": "config.json: 100%" + } + }, + "c566d7af0eb14e4db6f53b2426cbe5da": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8c4849cdd00540519c18855be4376548", + "placeholder": "​", + "style": "IPY_MODEL_c5bd24a20d4645c0937c2a4811da73fc", + "value": "tokenizer.json: 100%" + } + }, + "c5bd24a20d4645c0937c2a4811da73fc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c8a7f2ff55d34102a19189e23feb3ae4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c9f1003b9ea747a78bde82411de091ba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ced0e4662f0e42fa8b82775f50efd15b", + "max": 466081, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8ec927e652084cc0bf5b513275f5efc3", + "value": 466081 + } + }, + "cd93f50f87cb4169b08110f03545f8bd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cd9a8d967f97499fb4bc77f945234dfe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_66c946c5b1434087a3c838feabff07c5", + "placeholder": "​", + "style": "IPY_MODEL_e47b2666d47347639b264347c2f87ba2", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "cdac7b7c8a464aa6bea2d3bdd1106479": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ced0e4662f0e42fa8b82775f50efd15b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d13d60c460fd46468bd1aacb42a50939": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_cd9a8d967f97499fb4bc77f945234dfe", + "IPY_MODEL_8923512c87c244c19783ff200b4d5454", + "IPY_MODEL_f7317270a3c941bfa7c147aa19ae6111" + ], + "layout": "IPY_MODEL_68c078f715454f3c953787aeb38fe1be" + } + }, + "d605bbfa7de945ff8f2908fc601d786d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d8f3f0b7a0384097a217fd77d319a966": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e47b2666d47347639b264347c2f87ba2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e5bdc52a220c42199cf97208f8fcc367": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e826fb0e17594e3e9c1cb81c7848b636": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d8f3f0b7a0384097a217fd77d319a966", + "max": 743, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f674f0b58eca4813bebfb092b7732d31", + "value": 743 + } + }, + "e99e3a300b2742c39484c9d403f6c4fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ec00a4a5db4c4f6dbc9ffd6064534d6e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4a2c7268f067428a950351f564432f4e", + "placeholder": "​", + "style": "IPY_MODEL_4fae803d8c4f45e9aec71bf7fd1bde01", + "value": " 405/405 [00:00<00:00, 24.6kB/s]" + } + }, + "ec77e97cd7dc4a6f92f4fb7be8e807f7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "edcb83aee99b4d67a3e5c1e3f73e418f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee1a98f082e84f57913a8c77d1b46cbb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b1a9887c49774487849bd47ae9edc927", + "IPY_MODEL_f77e53f86c10449d800035b20394067b", + "IPY_MODEL_8f553121841441ba8dcd4fc19cadd037" + ], + "layout": "IPY_MODEL_ff78ef1f30db4046a58f313e4488b55c" + } + }, + "effc2dbaafe8467ca8bc0e151a72566f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f2c9cf22f1ec47d18dbbd14fd60bd525": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_687cf1258e2b47ffb1a169307f78cd61", + "placeholder": "​", + "style": "IPY_MODEL_20461602ac2647a4a8607f89e895cbd1", + "value": " 438M/438M [00:04<00:00, 176MB/s]" + } + }, + "f4baaaa3aadc4bbb9e2f0087ce6273b1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cdac7b7c8a464aa6bea2d3bdd1106479", + "max": 438349816, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1e00c5a5c32b4708a12ac145a2a08a94", + "value": 438349816 + } + }, + "f674f0b58eca4813bebfb092b7732d31": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f7317270a3c941bfa7c147aa19ae6111": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ec77e97cd7dc4a6f92f4fb7be8e807f7", + "placeholder": "​", + "style": "IPY_MODEL_e99e3a300b2742c39484c9d403f6c4fb", + "value": " 8.10G/8.10G [03:14<00:00, 42.5MB/s]" + } + }, + "f77e53f86c10449d800035b20394067b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b5d106d794bd4aaa9cbfed7c8f98b1ab", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e5bdc52a220c42199cf97208f8fcc367", + "value": 112 + } + }, + "fdf81aa2e92d4c3c998798e820d5c3c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_81d5f2d59dc2425a87ea419b345ab988", + "placeholder": "​", + "style": "IPY_MODEL_5ade4f272cc346f9aac412049a3d13e2", + "value": "tokenizer_config.json: 100%" + } + }, + "fe986e4b2af14539ac7c8ebd9693579a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0369b053c6424fd7bad357433df8dc85", + "placeholder": "​", + "style": "IPY_MODEL_7a2bbc79b1ba4dacad15b290e29e869f", + "value": " 1.63k/1.63k [00:00<00:00, 130kB/s]" + } + }, + "ff78ef1f30db4046a58f313e4488b55c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ffcd545025454565866309ab2028a986": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/benchmark/qwen_2_5_7B_perfect_context.ipynb b/benchmark/qwen_2_5_7B_perfect_context.ipynb index 5cd284c..45f32d9 100644 --- a/benchmark/qwen_2_5_7B_perfect_context.ipynb +++ b/benchmark/qwen_2_5_7B_perfect_context.ipynb @@ -1,1516 +1,1473 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oD2lVadPlyhR" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JZeb4ABvlyhS" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aI1i7vlyhS" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "atlPXFshlyhS" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QrgOGtuGlyhT", - "outputId": "23c329f7-3cea-49af-b969-1bad857de7f1" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cloning into 'structured-qa'...\n", - "remote: Enumerating objects: 803, done.\u001b[K\n", - "remote: Counting objects: 100% (241/241), done.\u001b[K\n", - "remote: Compressing objects: 100% (137/137), done.\u001b[K\n", - "remote: Total 803 (delta 154), reused 133 (delta 97), pack-reused 562 (from 1)\u001b[K\n", - "Receiving objects: 100% (803/803), 2.29 MiB | 28.93 MiB/s, done.\n", - "Resolving deltas: 100% (436/436), done.\n" - ] - } - ], - "source": [ - "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "S22kTrfPlyhU", - "outputId": "71ee6d98-0c8d-41fa-d3b7-574021b403ea" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing ./structured-qa\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev86+gb726447) (0.27.1)\n", - "Collecting loguru (from structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev86+gb726447) (2.10.6)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev86+gb726447) (6.0.2)\n", - "Collecting rapidfuzz (from structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev86+gb726447) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev86+gb726447) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev86+gb726447) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev86+gb726447)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev86+gb726447) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (1.24.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev86+gb726447) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev86+gb726447) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev86+gb726447) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev86+gb726447) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m63.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m68.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m61.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev86+gb726447-py3-none-any.whl size=16326 sha256=dd9419f56083c11c4717d20ca525f0f2ec3d4d42390326c036c361d00086a56f\n", - " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=a99cfbf78323f009ff3d061ed4ab77b8b8b52f29206689bd5fa5800c30096cfc\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - "Successfully built structured-qa fire\n", - "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", - "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.11.0 streamlit-1.41.1 structured-qa-0.3.3.dev86+gb726447 watchdog-6.0.0\n" - ] - } - ], - "source": [ - "%pip install ./structured-qa" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "QrgOGtuGlyhT", + "outputId": "7d9da451-699e-46bf-f14f-862128abbaf1" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mZtwFXA5IOvn", - "outputId": "853d0379-9081-4447-c8a0-4a57267c9a48" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h" - ] - } - ], - "source": [ - "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'structured-qa'...\n", + "remote: Enumerating objects: 823, done.\u001b[K\n", + "remote: Counting objects: 100% (261/261), done.\u001b[K\n", + "remote: Compressing objects: 100% (152/152), done.\u001b[K\n", + "remote: Total 823 (delta 169), reused 143 (delta 102), pack-reused 562 (from 1)\u001b[K\n", + "Receiving objects: 100% (823/823), 2.39 MiB | 5.13 MiB/s, done.\n", + "Resolving deltas: 100% (451/451), done.\n" + ] + } + ], + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "S22kTrfPlyhU", + "outputId": "2bf94fb7-dcc4-413f-da2e-93bc4fc832e9" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "qwHWJEsulyhV" - }, - "source": [ - "# Setup" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing ./structured-qa\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev90+gd12fa72) (0.27.1)\n", + "Collecting loguru (from structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev90+gd12fa72) (2.10.6)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev90+gd12fa72) (6.0.2)\n", + "Collecting rapidfuzz (from structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev90+gd12fa72) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev90+gd12fa72) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev90+gd12fa72) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev90+gd12fa72)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev90+gd12fa72) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (1.24.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev90+gd12fa72) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev90+gd12fa72) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m62.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m117.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m120.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m93.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev90+gd12fa72-py3-none-any.whl size=16325 sha256=2bc896037f77afbd304759ce187b8fcc29572a39f0f67cb4d0185c0c227fb312\n", + " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=dbd6eab2c337013f10770b03fd003525cb35432b52582d926da5d7fd9df2619c\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.11.0 streamlit-1.41.1 structured-qa-0.3.3.dev90+gd12fa72 watchdog-6.0.0\n" + ] + } + ], + "source": [ + "%pip install ./structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "mZtwFXA5IOvn", + "outputId": "03cf5cb1-8bd5-49af-c32e-9d420dc14fd9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "iJ812u2llyhV" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "The current information available is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "The answer must return ONLY one of the following strings and nothing else:\n", + "- YES/NO (for boolean questions)\n", + "Is the model an LLM?\n", + "YES\n", + "- Number (for numeric questions)\n", + "How many layers does the model have?\n", + "12\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "C\n", + "\"\"\"\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", + " CURRENT_INFO=section_file.read_text()\n", + " ),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " response = model.get_response(messages)\n", + " logger.info(f\"Answer: {response}\")\n", + " answers[index] = response\n", + " sections[index] = None\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156, + "referenced_widgets": [ + "970fb4579dab4f0e96f2efe052f0a463", + "397f1a99a7cf41599453f936b207a44a", + "e80453d93a614c5885d0a8270db53e7a", + "dc3fdea0c6cf45239f995b029219b664", + "f3534030a918419fa1687b6407ff002e", + "3459132c36e6472ba415aca0de6fe3e5", + "0b95e6210f4f4788a718c652cd5593c4", + "de44370268a441c48c7b81fc0b138c22", + "8ca481ba99984fb2b9d87f02813c7408", + "c31f992a506241ea94e254189dc696f1", + "87bfd1ae753e4586a139a988ae2c4601" + ] }, + "id": "ObsvwlNslyhZ", + "outputId": "8bd302b4-628d-4884-934d-d9c5782fe761" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "jWlaKC5qXZrh" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "MKijHC_ClyhX" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "970fb4579dab4f0e96f2efe052f0a463", + "version_major": 2, + "version_minor": 0 }, - "source": [ - "## Function to Process all questions for a single Section" + "text/plain": [ + "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:07.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:07.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:09.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:09.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:09.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:09.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:09.716\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:10.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:11.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:12.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 14\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:14.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:15.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:16.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:16.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:16.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:16.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:16.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:17.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 100\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:17.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:17.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:17.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:17.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:17.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:18.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:18.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:18.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:18.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:18.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:18.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:19.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:20.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:21.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:21.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:21.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.241\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 1024\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:22.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:23.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: Pdrop = 0.1\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:23.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:23.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:23.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:23.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.389\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:24.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:25.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:26.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:27.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.003\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:28.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:29.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:29.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:29.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:30.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:30.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:30.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:30.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:30.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:30.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:31.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:31.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:31.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:32.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:33.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:33.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:33.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:33.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:33.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:33.389\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:33.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:34.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:35.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:35.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:35.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:35.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:36.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:37.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:37.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:37.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:37.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:37.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:38.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:40.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:41.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:41.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:41.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:42.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:42.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:42.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: -B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:43.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:44.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:45.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:45.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:45.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:45.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:45.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:46.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:46.861\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:46.864\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:47.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:47.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:48.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: -B: Copyright Office and Library of Congress’ Copyright Registration Guidance.\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:48.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:48.389\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-01-30 12:40:48.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 }, + "id": "EYYJgWf6lyha", + "outputId": "7ca03f34-74e2-4957-d3c2-8dd0cd48c3af" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18,\n \"min\": 10,\n \"max\": 65,\n \"num_unique_values\": 6,\n \"samples\": [\n 10,\n 43,\n 65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"5.4 Regularization\",\n \"3.4 Robustness of Chain of Thought\",\n \"EXPEDITION PHASE\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"What was the dropout rate used for the base model?\",\n \"How many annotators provided independent chains of thought?\",\n \"Do you need a fish to conquer a distant island?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"3\",\n \"YES\",\n \"6\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"2\",\n \"NO\",\n \"5\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" }, - "source": [ - "## Load Model" + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP = 0.1NaN
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?65NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "43 43 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "43 3.4 Robustness of Chain of Thought \n", + "47 CARD AND TILE EFFECTS \n", + "51 CHAPTER OVERVIEW \n", + "52 CARD AND TILE COSTS \n", + "65 EXPEDITION PHASE \n", + "\n", + " question answer pred_answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 PDROP = 0.1 \n", + "43 How many annotators provided independent chain... 3 2 \n", + "47 How many different races are there? 6 5 \n", + "51 After taking a landmark tile, do you reveal a ... NO YES \n", + "52 Can a player pay coins to compensate for missi... YES NO \n", + "65 Do you need a fish to conquer a distant island? YES NO \n", + "\n", + " pred_section \n", + "10 NaN \n", + "43 NaN \n", + "47 NaN \n", + "51 NaN \n", + "52 NaN \n", + "65 NaN " ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "wfz1XQDLlyha", + "outputId": "e2539d58-338a-4b59-87ae-ed66c480b015" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "6RoEbYj3XZri" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_llama_cpp_model" + "data": { + "text/plain": [ + "0.9393939393939394" ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0b95e6210f4f4788a718c652cd5593c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 153, - "referenced_widgets": [ - "00bbe62a72a64d7580010b8fdd1c9cd6", - "ab4af327ce664d9ba3240c66f33481cf", - "4f1c0bbaa5c5499a817a193bb7a4ecef", - "971414c1218e4de4a2973a4bf7943a81", - "6590c75ff1484ecd81d7db62b1b8e2ca", - "fd067a1b26c44ec4896bd2241fb7e3ce", - "ccbc263250264f31ab6398e96d7892d5", - "cd5b4abbbeb843e19f85387bc0522c33", - "a32d0ceeaf284c32aac7768df858e9e9", - "ff421a4ded21400a8e04c2c67fb5a03a", - "9b3a80c2cbc34259a8dc8b0b02c706ea" - ] - }, - "id": "ObsvwlNslyhZ", - "outputId": "6674c169-f578-4712-9c65-8480a8e3f8ed" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "00bbe62a72a64d7580010b8fdd1c9cd6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:01.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:01.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:02.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:02.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:03.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:03.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:03.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:03.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: EI30\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:03.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:03.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:04.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:04.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:04.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:04.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:05.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:06.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:06.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:06.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:07.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:08.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:08.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:08.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:08.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Number\n", - "Question: How many parameters are in the toy model (y = x^2) tree?\n", - "Answer: 14\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:10.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:11.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:11.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:11.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:11.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:11.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:12.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Five\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:12.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:12.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:12.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:12.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 100\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.010\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:13.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:14.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:15.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:15.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:15.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:16.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:16.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:16.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:19.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\n", - "Based on the information provided, the out-of-domain evaluation for symbolic reasoning tasks includes the coin flip task, as it mentions \"For last letter concatenation, the model only sees exemplars of names with two words, and then performs last letter concatenation on names with 3 and 4 words.4 We do the same for the number of potential flips in the coin flip task.\" This indicates that the coin flip task is used for out-of-domain evaluation.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:19.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:19.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:20.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:20.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:20.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:20.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:20.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:21.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1024\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:21.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:21.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: I need more info.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:21.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:21.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: Pdrop = 0.1\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:22.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:23.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:23.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:23.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:23.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:23.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:23.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:24.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: It is cached for later use and to avoid recompilation.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:24.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:24.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:24.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:24.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:25.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:26.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:27.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:27.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:27.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:27.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:27.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:28.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:29.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:29.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:29.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:29.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:29.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:30.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:30.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:30.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:30.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:30.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:30.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: Datasets ensuring quality and diversity\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:32.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:33.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B: 10 years\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:33.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:33.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:34.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:35.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:35.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:35.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:35.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:35.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:36.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:36.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:36.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: Player 1\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:36.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:36.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:37.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:37.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:37.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:37.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:37.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:38.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:39.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:39.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:39.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:39.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:39.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:40.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:40.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:40.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:41.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:44.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A: 7,500,000 EUR or 1% of annual turnover, whichever is higher.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:44.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:44.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:44.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:44.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:44.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:45.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:45.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:45.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:46.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:46.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:46.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:46.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:46.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:47.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:48.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:48.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:48.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:48.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:48.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:49.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:49.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:49.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:50.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:50.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:50.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:50.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m38\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:50.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-30 10:01:50.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "\n", - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for section_name, section_data in data.groupby(\"section\"):\n", - " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", - "\n", - " answers, sections = process_section_questions(section_file, section_data, model)\n", - "\n", - " for index in section_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] + "87bfd1ae753e4586a139a988ae2c4601": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "a9eMkW1-lyha" - }, - "source": [ - "# Results" - ] + "8ca481ba99984fb2b9d87f02813c7408": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 332 - }, - "id": "EYYJgWf6lyha", - "outputId": "d909a39d-ca09-41de-dc7a-32903880211d" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 21,\n \"min\": 10,\n \"max\": 78,\n \"num_unique_values\": 9,\n \"samples\": [\n 65,\n 14,\n 51\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"EXPEDITION PHASE\",\n \"3 Experimental Results\",\n \"CHAPTER OVERVIEW\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Do you need a fish to conquer a distant island?\",\n \"How many parameters are in the toy model (y = x^2) tree?\",\n \"After taking a landmark tile, do you reveal a new tile and the end of your turn?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"0.1\",\n \"14\",\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN THE TOY MODEL (Y = X^2) TREE?\\nANSWER: 14\",\n \"YES\",\n \"PDROP = 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP = 0.1NaN
1414https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many parameters are in the toy model (y = ...14NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN T...NaN
3838https://arxiv.org/pdf/2201.119033.1 Experimental SetupHow many large language models were evaluated?5FIVENaN
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?65NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOI NEED MORE INFO.NaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "14 14 https://arxiv.org/pdf/2210.05189 \n", - "38 38 https://arxiv.org/pdf/2201.11903 \n", - "43 43 https://arxiv.org/pdf/2201.11903 \n", - "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", - "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", - "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "\n", - " section \\\n", - "10 5.4 Regularization \n", - "14 3 Experimental Results \n", - "38 3.1 Experimental Setup \n", - "43 3.4 Robustness of Chain of Thought \n", - "47 CARD AND TILE EFFECTS \n", - "51 CHAPTER OVERVIEW \n", - "52 CARD AND TILE COSTS \n", - "65 EXPEDITION PHASE \n", - "78 5.2. Thread Hierarchy \n", - "\n", - " question answer \\\n", - "10 What was the dropout rate used for the base mo... 0.1 \n", - "14 How many parameters are in the toy model (y = ... 14 \n", - "38 How many large language models were evaluated? 5 \n", - "43 How many annotators provided independent chain... 3 \n", - "47 How many different races are there? 6 \n", - "51 After taking a landmark tile, do you reveal a ... NO \n", - "52 Can a player pay coins to compensate for missi... YES \n", - "65 Do you need a fish to conquer a distant island? YES \n", - "78 Can you identify a thread with a four-dimensio... NO \n", - "\n", - " pred_answer pred_section \n", - "10 PDROP = 0.1 NaN \n", - "14 NUMBER\\nQUESTION: HOW MANY PARAMETERS ARE IN T... NaN \n", - "38 FIVE NaN \n", - "43 2 NaN \n", - "47 5 NaN \n", - "51 YES NaN \n", - "52 NO NaN \n", - "65 NO NaN \n", - "78 I NEED MORE INFO. NaN " - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } + "970fb4579dab4f0e96f2efe052f0a463": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_397f1a99a7cf41599453f936b207a44a", + "IPY_MODEL_e80453d93a614c5885d0a8270db53e7a", + "IPY_MODEL_dc3fdea0c6cf45239f995b029219b664" ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "for index, result in results.iterrows():\n", - " if result[\"pred_answer\"].startswith((f\"-{result['answer']}\", f\"{result['answer']}\")):\n", - " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] + "layout": "IPY_MODEL_f3534030a918419fa1687b6407ff002e" + } }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "wfz1XQDLlyha", - "outputId": "fca09c21-3265-426d-b654-b045db8b1dae" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9090909090909091" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] + "c31f992a506241ea94e254189dc696f1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] + "dc3fdea0c6cf45239f995b029219b664": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c31f992a506241ea94e254189dc696f1", + "placeholder": "​", + "style": "IPY_MODEL_87bfd1ae753e4586a139a988ae2c4601", + "value": " 8.10G/8.10G [03:12<00:00, 41.5MB/s]" + } }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" + "de44370268a441c48c7b81fc0b138c22": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - "language_info": { - "name": "python", - "version": "3.10.12" + "e80453d93a614c5885d0a8270db53e7a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_de44370268a441c48c7b81fc0b138c22", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8ca481ba99984fb2b9d87f02813c7408", + "value": 8098525888 + } }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "00bbe62a72a64d7580010b8fdd1c9cd6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ab4af327ce664d9ba3240c66f33481cf", - "IPY_MODEL_4f1c0bbaa5c5499a817a193bb7a4ecef", - "IPY_MODEL_971414c1218e4de4a2973a4bf7943a81" - ], - "layout": "IPY_MODEL_6590c75ff1484ecd81d7db62b1b8e2ca" - } - }, - "4f1c0bbaa5c5499a817a193bb7a4ecef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cd5b4abbbeb843e19f85387bc0522c33", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a32d0ceeaf284c32aac7768df858e9e9", - "value": 8098525888 - } - }, - "6590c75ff1484ecd81d7db62b1b8e2ca": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "971414c1218e4de4a2973a4bf7943a81": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ff421a4ded21400a8e04c2c67fb5a03a", - "placeholder": "​", - "style": "IPY_MODEL_9b3a80c2cbc34259a8dc8b0b02c706ea", - "value": " 8.10G/8.10G [06:24<00:00, 21.6MB/s]" - } - }, - "9b3a80c2cbc34259a8dc8b0b02c706ea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a32d0ceeaf284c32aac7768df858e9e9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ab4af327ce664d9ba3240c66f33481cf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fd067a1b26c44ec4896bd2241fb7e3ce", - "placeholder": "​", - "style": "IPY_MODEL_ccbc263250264f31ab6398e96d7892d5", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" - } - }, - "ccbc263250264f31ab6398e96d7892d5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cd5b4abbbeb843e19f85387bc0522c33": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fd067a1b26c44ec4896bd2241fb7e3ce": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ff421a4ded21400a8e04c2c67fb5a03a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - } + "f3534030a918419fa1687b6407ff002e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } From 136043794748c826d7a1641c262de6cba1ce78dc Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 30 Jan 2025 20:10:59 +0100 Subject: [PATCH 092/120] Cleanup --- benchmark/RAGatouille.ipynb | 321 ------------------------------------ benchmark/benchmark.ipynb | 185 --------------------- 2 files changed, 506 deletions(-) delete mode 100644 benchmark/RAGatouille.ipynb delete mode 100644 benchmark/benchmark.ipynb diff --git a/benchmark/RAGatouille.ipynb b/benchmark/RAGatouille.ipynb deleted file mode 100644 index 20489ed..0000000 --- a/benchmark/RAGatouille.ipynb +++ /dev/null @@ -1,321 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## GPU Check" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, you'll need to enable GPUs for the notebook:\n", - "\n", - "- Navigate to `Edit`→`Notebook Settings`\n", - "- Select T4 GPU from the Hardware Accelerator section\n", - "- Click `Save` and accept.\n", - "\n", - "Next, we'll confirm that we can connect to the GPU:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "if not torch.cuda.is_available():\n", - " raise RuntimeError(\"GPU not available\")\n", - "else:\n", - " print(\"GPU is available!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install ragatouille" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to Download Document" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "from loguru import logger\n", - "\n", - "\n", - "def download_document(url, output_file):\n", - " if not Path(output_file).exists():\n", - " urlretrieve(url, output_file)\n", - " logger.info(f\"Downloaded {url} to {output_file}\")\n", - " else:\n", - " logger.info(f\"File {output_file} already exists\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to Process a single Document" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from ragatouille import RAGPretrainedModel\n", - "\n", - "\n", - "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", - "You are a rigorous assistant answering questions.\n", - "You only answer based on the current information available.\n", - "You should only answer with ANSWER_TYPE.\n", - "\n", - "The current information available is:\n", - "\n", - "{CURRENT_INFO}\n", - "\n", - "If the current information available not enough to answer the question,\n", - "you must return the following message and nothing else:\n", - "\n", - "```\n", - "I need more info.\n", - "```\n", - "\"\"\"\n", - "\n", - "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - " answer_prompt=ANSWER_WITH_TYPE_PROMPT,\n", - "):\n", - " logger.info(\"Setting up RAG\")\n", - " RAG = RAGPretrainedModel.from_pretrained(\"colbert-ir/colbertv2.0\")\n", - " RAG.index([document_file])\n", - "\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " question = row[\"question\"]\n", - " try:\n", - " float(row[\"answer\"])\n", - " answer_type = \"a number\"\n", - " except ValueError:\n", - " if row[\"answer\"] in (\"YES\", \"NO\"):\n", - " answer_type = \"YES or NO\"\n", - " else:\n", - " answer_type = \"a single letter\"\n", - "\n", - " answer_prompt = answer_prompt.replace(\"ANSWER_TYPE\", answer_type)\n", - "\n", - " logger.info(f\"Question: {question}\")\n", - " logger.info(\"RAG search\")\n", - " results = RAG.search(query=question, k=3)\n", - "\n", - " current_info = \"\\n\".join(result[\"content\"] for result in results)\n", - " messages = [\n", - " {\n", - " \"role\": \"system\",\n", - " \"content\": answer_prompt.format(CURRENT_INFO=current_info),\n", - " },\n", - " {\"role\": \"user\", \"content\": question},\n", - " ]\n", - " answer = model.get_response(messages)\n", - "\n", - " answers[index] = answer\n", - " sections[index] = None\n", - "\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_llama_cpp_model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = load_llama_cpp_model(\n", - " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " download_document(document_link, downloaded_document)\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb deleted file mode 100644 index 244b765..0000000 --- a/benchmark/benchmark.ipynb +++ /dev/null @@ -1,185 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## GPU Check" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, you'll need to enable GPUs for the notebook:\n", - "\n", - "- Navigate to `Edit`→`Notebook Settings`\n", - "- Select T4 GPU from the Hardware Accelerator section\n", - "- Click `Save` and accept.\n", - "\n", - "Next, we'll confirm that we can connect to the GPU:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "if not torch.cuda.is_available():\n", - " raise RuntimeError(\"GPU not available\")\n", - "else:\n", - " print(\"GPU is available!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Find Retrieve Answer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_unsloth_model\n", - "from structured_qa.benchmark.run_benchmark import run_benchmark\n", - "from structured_qa.benchmark.find_retrieve_answer import fra_process_document" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_benchmark(\n", - " input_data=\"structured_qa.csv\",\n", - " output_file=\"fra_results.csv\",\n", - " process_document=fra_process_document,\n", - " model=load_unsloth_model(\"unsloth/Meta-Llama-3.1-8B-Instruct\", \"llama-3.1\"),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 69069913d19935fa8b7c7cdccfe1b5f24fb49fc0 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 30 Jan 2025 20:12:32 +0100 Subject: [PATCH 093/120] Cleanup --- src/structured_qa/benchmark/__init__.py | 0 .../benchmark/find_retrieve_answer.py | 65 -------------- src/structured_qa/benchmark/full_context.py | 90 ------------------- src/structured_qa/benchmark/gemini.py | 0 src/structured_qa/benchmark/run_benchmark.py | 44 --------- 5 files changed, 199 deletions(-) delete mode 100644 src/structured_qa/benchmark/__init__.py delete mode 100644 src/structured_qa/benchmark/find_retrieve_answer.py delete mode 100644 src/structured_qa/benchmark/full_context.py delete mode 100644 src/structured_qa/benchmark/gemini.py delete mode 100644 src/structured_qa/benchmark/run_benchmark.py diff --git a/src/structured_qa/benchmark/__init__.py b/src/structured_qa/benchmark/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/structured_qa/benchmark/find_retrieve_answer.py b/src/structured_qa/benchmark/find_retrieve_answer.py deleted file mode 100644 index 075b149..0000000 --- a/src/structured_qa/benchmark/find_retrieve_answer.py +++ /dev/null @@ -1,65 +0,0 @@ -from pathlib import Path -from loguru import logger - -from structured_qa.config import FIND_PROMPT -from structured_qa.preprocessing import document_to_sections_dir -from structured_qa.workflow import find_retrieve_answer - - -ANSWER_WITH_TYPE_PROMPT = """ -You are a rigorous assistant answering questions. -You only answer based on the current information available. -You should only answer with ANSWER_TYPE. - -The current information available is: - -``` -{CURRENT_INFO} -``` - -If the current information available not enough to answer the question, -you must return the following message and nothing else: - -``` -I need more info. -``` -""" - - -def fra_process_document( - document_file, - document_data, - model, - find_prompt: str = FIND_PROMPT, - answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, -): - sections_dir = Path("sections") / Path(document_file).stem - if not sections_dir.exists(): - logger.info("Splitting document into sections") - document_to_sections_dir(document_file, sections_dir) - - logger.info("Predicting") - answers = {} - sections = {} - for index, row in document_data.iterrows(): - question = row["question"] - try: - float(row["answer"]) - answer_type = "a number" - except ValueError: - if row["answer"] in ("YES", "NO"): - answer_type = "YES or NO" - else: - answer_type = "a single letter" - - answer_prompt = answer_prompt.replace("ANSWER_TYPE", answer_type) - - logger.info(f"Question: {question}") - answer, sections_checked = find_retrieve_answer( - question, model, sections_dir, find_prompt, answer_prompt - ) - - answers[index] = answer - sections[index] = sections_checked[-1] if sections_checked else None - - return answers, sections diff --git a/src/structured_qa/benchmark/full_context.py b/src/structured_qa/benchmark/full_context.py deleted file mode 100644 index b3bcc2b..0000000 --- a/src/structured_qa/benchmark/full_context.py +++ /dev/null @@ -1,90 +0,0 @@ -import re - -import PyPDF2 -from loguru import logger - - -ANSWER_WITH_TYPE_PROMPT = """ -You are a rigorous assistant answering questions. -You only answer based on the current information available. -You should only answer with ANSWER_TYPE. - -The current information available is: - -``` -{CURRENT_INFO} -``` -""" - - -def load_pdf(pdf_file: str) -> str | None: - try: - pdf_reader = PyPDF2.PdfReader(pdf_file) - return "\n".join(page.extract_text() for page in pdf_reader.pages) - except Exception as e: - logger.exception(e) - return None - - -def clean_with_regex(text: str) -> str: - text = re.sub( - r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", - "", - text, - ) - text = re.sub(r"[\w\.-]+@[\w\.-]+\.[\w]+", "", text) - text = re.sub(r'[^a-zA-Z0-9\s.,!?;:"\']', "", text) - text = re.sub(r"\s+", " ", text).strip() - return text - - -def full_context_process_document( - document_file, - document_data, - model, - answer_prompt: str = ANSWER_WITH_TYPE_PROMPT, -): - document = clean_with_regex(load_pdf(document_file)) - - logger.info(f"Length of the document: {len(document)}") - max_characters = model.model.n_ctx() * 4 - if len(document) > max_characters: - logger.warning( - f"Input text is too big ({len(document)})." - f" Using only a subset of it ({max_characters})." - ) - document = document[:max_characters] - - logger.info("Predicting") - answers = {} - sections = {} - for index, row in document_data.iterrows(): - question = row["question"] - try: - float(row["answer"]) - answer_type = "a number" - except ValueError: - if row["answer"] in ("YES", "NO"): - answer_type = "YES or NO" - else: - answer_type = "a single letter" - - answer_prompt = answer_prompt.replace("ANSWER_TYPE", answer_type) - - logger.info(f"Question: {question}") - messages = [ - { - "role": "system", - "content": answer_prompt.format(CURRENT_INFO="\n".join(document)), - }, - {"role": "user", "content": question}, - ] - try: - response = model.get_response(messages) - except Exception as e: - logger.error(f"Failed to generate completion: {e}") - response = "Generation Error" - answers[index] = response - sections[index] = None - - return answers, sections diff --git a/src/structured_qa/benchmark/gemini.py b/src/structured_qa/benchmark/gemini.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/structured_qa/benchmark/run_benchmark.py b/src/structured_qa/benchmark/run_benchmark.py deleted file mode 100644 index c8d4d25..0000000 --- a/src/structured_qa/benchmark/run_benchmark.py +++ /dev/null @@ -1,44 +0,0 @@ -from pathlib import Path -from urllib.request import urlretrieve -from typing import Callable - -import pandas as pd -from fire import Fire -from loguru import logger - - -def download_document(url, output_file): - if not Path(output_file).exists(): - urlretrieve(url, output_file) - logger.debug(f"Downloaded {url} to {output_file}") - else: - logger.debug(f"File {output_file} already exists") - - -@logger.catch(reraise=True) -def run_benchmark( - input_data: str, output_file: str, process_document: Callable, **kwargs -): - logger.info("Loading input data") - data = pd.read_csv(input_data) - data["pred_answer"] = [None] * len(data) - data["pred_section"] = [None] * len(data) - - for document_link, document_data in data.groupby("document"): - logger.info(f"Downloading document {document_link}") - downloaded_document = Path(f"{Path(document_link).name}.pdf") - download_document(document_link, downloaded_document) - - answers, sections = process_document( - downloaded_document, document_data, **kwargs - ) - - for index in document_data.index: - data.loc[index, "pred_answer"] = str(answers[index]).upper() - data.loc[index, "pred_section"] = sections[index] - - data.to_csv(output_file) - - -if __name__ == "__main__": - Fire(run_benchmark) From 8abcfb1dd34817f5b43ec0033fe071c69057fede Mon Sep 17 00:00:00 2001 From: daavoo Date: Fri, 31 Jan 2025 13:51:03 +0100 Subject: [PATCH 094/120] Add DeepSeek-R1-Distill-Qwen-7B --- ...k_R1_Distill_Qwen_7B_perfect_context.ipynb | 1032 +++++++++++++++++ 1 file changed, 1032 insertions(+) create mode 100644 benchmark/DeepSeek_R1_Distill_Qwen_7B_perfect_context.ipynb diff --git a/benchmark/DeepSeek_R1_Distill_Qwen_7B_perfect_context.ipynb b/benchmark/DeepSeek_R1_Distill_Qwen_7B_perfect_context.ipynb new file mode 100644 index 0000000..8dc67f1 --- /dev/null +++ b/benchmark/DeepSeek_R1_Distill_Qwen_7B_perfect_context.ipynb @@ -0,0 +1,1032 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QrgOGtuGlyhT", + "outputId": "a3f2857f-b0ea-4e33-cd92-38fec3ca87b9" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "fatal: destination path 'structured-qa' already exists and is not an empty directory.\n" + ] + } + ], + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S22kTrfPlyhU", + "outputId": "b8ec18d2-0da1-49bc-98aa-b73869c5c99c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Processing ./structured-qa\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (3.12.1)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev93+g6906991) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev93+g6906991) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (2024.9.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev93+g6906991) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev93+g6906991) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev93+g6906991) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (3.20.3)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev93+g6906991) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev93+g6906991) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev93+g6906991) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev93+g6906991) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev93+g6906991) (1.17.0)\n", + "Building wheels for collected packages: structured-qa\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev93+g6906991-py3-none-any.whl size=13072 sha256=5d3ec90fa03ce1a4f1fb52f1d1c79ef1dcf475d443839e22372e2d9779e34f54\n", + " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", + "Successfully built structured-qa\n", + "Installing collected packages: structured-qa\n", + " Attempting uninstall: structured-qa\n", + " Found existing installation: structured-qa 0.3.3.dev93+g6906991\n", + " Uninstalling structured-qa-0.3.3.dev93+g6906991:\n", + " Successfully uninstalled structured-qa-0.3.3.dev93+g6906991\n", + "Successfully installed structured-qa-0.3.3.dev93+g6906991\n" + ] + } + ], + "source": [ + "%pip install ./structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZtwFXA5IOvn", + "outputId": "99c319c7-123b-4559-cf2d-3726f6dab3e0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "The current information available is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "The answer must be ONLY one of the following strings and nothing else:\n", + "- YES/NO (for boolean questions)\n", + "Is the model an LLM?\n", + "YES\n", + "- Number (for numeric questions)\n", + "How many layers does the model have?\n", + "12\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "C\n", + "\"\"\"\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", + " CURRENT_INFO=section_file.read_text()\n", + " ),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " response = model.get_response(messages)\n", + " logger.info(f\"Answer: {response}\")\n", + " answers[index] = response\n", + " sections[index] = None\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "source": [ + "%pip install --no-cache-dir --upgrade unsloth\n", + "%pip uninstall unsloth unsloth_zoo -y\n", + "%pip install --upgrade --no-cache-dir \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", + "%pip install --upgrade --no-cache-dir \"git+https://github.com/unslothai/unsloth-zoo.git\"" + ], + "metadata": { + "id": "JdMzMWMQ4bBq", + "outputId": "11ba6d55-d6fe-4b35-fc3f-c9e9c6218dc9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: unsloth in /usr/local/lib/python3.11/dist-packages (2025.1.8)\n", + "Found existing installation: unsloth 2025.1.8\n", + "Uninstalling unsloth-2025.1.8:\n", + " Successfully uninstalled unsloth-2025.1.8\n", + "Found existing installation: unsloth_zoo 2025.1.5\n", + "Uninstalling unsloth_zoo-2025.1.5:\n", + " Successfully uninstalled unsloth_zoo-2025.1.5\n", + "Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)\n", + " Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-_6n90mml/unsloth_f9c5530fd943413db7ff81d7b8e72107\n", + " Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-_6n90mml/unsloth_f9c5530fd943413db7ff81d7b8e72107\n", + " Resolved https://github.com/unslothai/unsloth.git to commit 038e6d4c8d40207a87297ab3aaf787c19b1006d1\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting unsloth_zoo>=2025.1.4 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)\n", + " Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (24.2)\n", + "Requirement already satisfied: tyro in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.9.13)\n", + "Requirement already satisfied: transformers!=4.47.0,>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.47.1)\n", + "Requirement already satisfied: datasets>=2.16.0 in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.2.0)\n", + "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.2.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.67.1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (5.9.5)\n", + "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.45.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.26.4)\n", + "Requirement already satisfied: protobuf<4.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.20.3)\n", + "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.27.1)\n", + "Requirement already satisfied: hf_transfer in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.1.9)\n", + "Requirement already satisfied: bitsandbytes>=0.43.3 in /usr/local/lib/python3.11/dist-packages (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.45.1)\n", + "Requirement already satisfied: torch~=2.0 in /usr/local/lib/python3.11/dist-packages (from bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.5.1+cu124)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.17.0)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (17.0.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.3.8)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.32.3)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.70.16)\n", + "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.9.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.11.11)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.0.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.12.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.21.0)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.5.2)\n", + "Requirement already satisfied: triton<3.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo>=2025.1.4->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.1.0)\n", + "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo>=2025.1.4->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.2.1)\n", + "Requirement already satisfied: trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo>=2025.1.4->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.14.0)\n", + "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo>=2025.1.4->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.14.0)\n", + "Requirement already satisfied: cut_cross_entropy in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo>=2025.1.4->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (25.1.1)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo>=2025.1.4->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (11.1.0)\n", + "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.16)\n", + "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (13.9.4)\n", + "Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.7.1)\n", + "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (25.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.18.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.18.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.1.5)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (12.4.127)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.3.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2025.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.16.0->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.17.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch~=2.0->bitsandbytes>=0.43.3->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.0.2)\n", + "Downloading unsloth_zoo-2025.1.5-py3-none-any.whl (80 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: unsloth\n", + " Building wheel for unsloth (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for unsloth: filename=unsloth-2025.1.8-py3-none-any.whl size=174982 sha256=6b530a689e67e68b3a3bd714cfa95b8c53404de864084883b48e9383b3497ec2\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-b6pn9uot/wheels/d1/17/05/850ab10c33284a4763b0595cd8ea9d01fce6e221cac24b3c01\n", + "Successfully built unsloth\n", + "Installing collected packages: unsloth, unsloth_zoo\n", + "Successfully installed unsloth-2025.1.8 unsloth_zoo-2025.1.5\n", + "Collecting git+https://github.com/unslothai/unsloth-zoo.git\n", + " Cloning https://github.com/unslothai/unsloth-zoo.git to /tmp/pip-req-build-bz3r3r3r\n", + " Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth-zoo.git /tmp/pip-req-build-bz3r3r3r\n", + " Resolved https://github.com/unslothai/unsloth-zoo.git to commit 1101ee09f9464e259163a12a4ed5735c06873769\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (2.5.1+cu124)\n", + "Requirement already satisfied: triton<3.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (3.1.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (24.2)\n", + "Requirement already satisfied: tyro in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (0.9.13)\n", + "Requirement already satisfied: transformers>=4.46.1 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (4.47.1)\n", + "Requirement already satisfied: datasets>=2.16.0 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (3.2.0)\n", + "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (0.2.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (4.67.1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (5.9.5)\n", + "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (0.45.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (1.26.4)\n", + "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (1.2.1)\n", + "Requirement already satisfied: trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (0.14.0)\n", + "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (0.14.0)\n", + "Requirement already satisfied: protobuf<4.0.0 in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (3.20.3)\n", + "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (0.27.1)\n", + "Requirement already satisfied: hf_transfer in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (0.1.9)\n", + "Requirement already satisfied: cut_cross_entropy in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (25.1.1)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from unsloth_zoo==2025.1.5) (11.1.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth_zoo==2025.1.5) (6.0.2)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate>=0.34.1->unsloth_zoo==2025.1.5) (0.5.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (3.17.0)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (17.0.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (0.3.8)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (2.32.3)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (0.70.16)\n", + "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.16.0->unsloth_zoo==2025.1.5) (2024.9.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets>=2.16.0->unsloth_zoo==2025.1.5) (3.11.11)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->unsloth_zoo==2025.1.5) (4.12.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (3.1.5)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (12.4.127)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch->unsloth_zoo==2025.1.5) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch->unsloth_zoo==2025.1.5) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.46.1->unsloth_zoo==2025.1.5) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.46.1->unsloth_zoo==2025.1.5) (0.21.0)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.11/dist-packages (from trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth_zoo==2025.1.5) (13.9.4)\n", + "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth_zoo==2025.1.5) (0.16)\n", + "Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth_zoo==2025.1.5) (1.7.1)\n", + "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from tyro->unsloth_zoo==2025.1.5) (4.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth_zoo==2025.1.5) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth_zoo==2025.1.5) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth_zoo==2025.1.5) (25.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth_zoo==2025.1.5) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth_zoo==2025.1.5) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth_zoo==2025.1.5) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets>=2.16.0->unsloth_zoo==2025.1.5) (1.18.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth_zoo==2025.1.5) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth_zoo==2025.1.5) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth_zoo==2025.1.5) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth_zoo==2025.1.5) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth_zoo==2025.1.5) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth_zoo==2025.1.5) (2.18.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch->unsloth_zoo==2025.1.5) (3.0.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.0->unsloth_zoo==2025.1.5) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.0->unsloth_zoo==2025.1.5) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets>=2.16.0->unsloth_zoo==2025.1.5) (2025.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth_zoo==2025.1.5) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.16.0->unsloth_zoo==2025.1.5) (1.17.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_unsloth_model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 243, + "referenced_widgets": [ + "0ad923646c19424aa25044aaa542d48c", + "a87497755c5d4b9282e91caeb298b4b2", + "5039935aca92416b87ea35620eca8537", + "ffc3cb2979f44fbba11949ecc136a30d", + "041c12eac49f4002ba1bf6bcf4743204", + "3f7950ef7e684ff8bc16459c24a1e75e", + "5af16894ba99438293dff26869d77da1", + "ae352af60b784f79975f2a340601f5e6", + "7ba26300ad114d6296ab45f5629e4744", + "f173976fc81146e69ff3cd644be4b044", + "95b2052a2e0f47cd84dacb562703753d" + ] + }, + "id": "ObsvwlNslyhZ", + "outputId": "4a84d2fb-46f1-40ab-e8cb-1f4679918b29" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", + "🦥 Unsloth Zoo will now patch everything to make training faster!\n", + "==((====))== Unsloth 2025.1.8: Fast Qwen2 patching. Transformers: 4.47.1.\n", + " \\\\ /| GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.\n", + "O^O/ \\_/ \\ Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0\n", + "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]\n", + " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", + "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00 to EOS = <|end▁of▁sentence|>.\n", + "You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.\n" + ] + } + ], + "source": [ + "model = load_unsloth_model(\n", + " \"unsloth/DeepSeek-R1-Distill-Qwen-7B\", \"chatml\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AZBwRnfjlyhZ", + "outputId": "ed59db2a-7125-419b-bff5-33711ca02fce" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[32m2025-01-31 12:42:34.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-31 12:42:34.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-31 12:42:34.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EYYJgWf6lyha" + }, + "outputs": [], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wfz1XQDLlyha" + }, + "outputs": [], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0ad923646c19424aa25044aaa542d48c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a87497755c5d4b9282e91caeb298b4b2", + "IPY_MODEL_5039935aca92416b87ea35620eca8537", + "IPY_MODEL_ffc3cb2979f44fbba11949ecc136a30d" + ], + "layout": "IPY_MODEL_041c12eac49f4002ba1bf6bcf4743204" + } + }, + "a87497755c5d4b9282e91caeb298b4b2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3f7950ef7e684ff8bc16459c24a1e75e", + "placeholder": "​", + "style": "IPY_MODEL_5af16894ba99438293dff26869d77da1", + "value": "Loading checkpoint shards: 100%" + } + }, + "5039935aca92416b87ea35620eca8537": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ae352af60b784f79975f2a340601f5e6", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7ba26300ad114d6296ab45f5629e4744", + "value": 2 + } + }, + "ffc3cb2979f44fbba11949ecc136a30d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f173976fc81146e69ff3cd644be4b044", + "placeholder": "​", + "style": "IPY_MODEL_95b2052a2e0f47cd84dacb562703753d", + "value": " 2/2 [00:43<00:00, 21.33s/it]" + } + }, + "041c12eac49f4002ba1bf6bcf4743204": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3f7950ef7e684ff8bc16459c24a1e75e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5af16894ba99438293dff26869d77da1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ae352af60b784f79975f2a340601f5e6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7ba26300ad114d6296ab45f5629e4744": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f173976fc81146e69ff3cd644be4b044": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "95b2052a2e0f47cd84dacb562703753d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 034fe2909dbb61fab6450b6dacdfb5fa3c6d24d8 Mon Sep 17 00:00:00 2001 From: daavoo Date: Sat, 1 Feb 2025 09:35:20 +0100 Subject: [PATCH 095/120] Debug current calls. Set to 9 before reset --- src/structured_qa/model_loaders.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py index 1a62837..ee8950b 100644 --- a/src/structured_qa/model_loaders.py +++ b/src/structured_qa/model_loaders.py @@ -93,8 +93,9 @@ def __init__(self, model): self.current_calls = 0 def get_response(self, messages): + logger.info(f"Current calls: {self.current_calls}") stacked_message = "\n".join(message["content"] for message in messages) - if self.current_calls >= 10: + if self.current_calls >= 9: logger.info("Waiting for 60 seconds") time.sleep(60) self.current_calls = 0 From a2d301f899d1ed4cac0af31ea93337629f2c3e48 Mon Sep 17 00:00:00 2001 From: daavoo Date: Sat, 1 Feb 2025 18:11:30 +0100 Subject: [PATCH 096/120] Add qwen find retrieve answer --- .../qwen_2_5_7B_find_retrieve_answer.ipynb | 1267 +++++++++++++++++ 1 file changed, 1267 insertions(+) create mode 100644 benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb diff --git a/benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb b/benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb new file mode 100644 index 0000000..edeca37 --- /dev/null +++ b/benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb @@ -0,0 +1,1267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9RKWbX7BHEgr" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PYuloevCHEgu" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgYAsUQWHEgv" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EbFAX4heHEgv" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2HoyF-xbHEgv", + "outputId": "c67c89e2-0140-432a-c741-a2546685176b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-zpym8juf\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-zpym8juf\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 7b9c96cd5fc3cd34781aa26e2519a6f4731feedc\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (3.11.0)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev84+g7b9c96c) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev84+g7b9c96c) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.23.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (24.3.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.36.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.17.0)\n" + ] + } + ], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p_hsSGafHEgw", + "outputId": "b78ec720-0315-48fe-9d15-dc3400ca69d7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-01-29 13:54:23-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21734 (21K) [text/plain]\n", + "Saving to: ‘structured_qa.csv.3’\n", + "\n", + "\rstructured_qa.csv.3 0%[ ] 0 --.-KB/s \rstructured_qa.csv.3 100%[===================>] 21.22K --.-KB/s in 0.002s \n", + "\n", + "2025-01-29 13:54:24 (9.19 MB/s) - ‘structured_qa.csv.3’ saved [21734/21734]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0MDfM6cyHEgx" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "5bLJE4U7HEgx" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "y3yUsRDWHEgy" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AgpODLeJHEgy" + }, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "n6d8F7cYHEgy" + }, + "outputs": [], + "source": [ + "from structured_qa.config import FIND_PROMPT\n", + "from structured_qa.preprocessing import document_to_sections_dir\n", + "from structured_qa.workflow import find_retrieve_answer\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You only answer based on the current information available.\n", + "The current information available is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "The answer must be in one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "Is the model an LLM?\n", + "YES\n", + "- Number (for numeric questions)\n", + "How many layers does the model have?\n", + "12\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "C\n", + "\"\"\"\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + " find_prompt: str = FIND_PROMPT,\n", + " answer_prompt: str = ANSWER_WITH_TYPE_PROMPT,\n", + "):\n", + " sections_dir = Path(\"sections\") / Path(document_file).stem\n", + " if not sections_dir.exists():\n", + " logger.info(\"Splitting document into sections\")\n", + " document_to_sections_dir(document_file, sections_dir)\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " answer, sections_checked = find_retrieve_answer(\n", + " question, model, sections_dir, find_prompt, answer_prompt\n", + " )\n", + " logger.info(f\"Answer: {answer}\")\n", + " answers[index] = answer\n", + " sections[index] = sections_checked[-1] if sections_checked else None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GdlWjANdHEgz" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "9zx8nCaZHEgz" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "U4R84hHRHEgz" + }, + "outputs": [], + "source": [ + "model = load_llama_cpp_model(\n", + " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BEzqJJ1yHEgz" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "-qtPf9RmHEgz", + "outputId": "bc1ab1b7-e8d2-4fb0-981b-883e7eeaa794" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-29 13:54:26.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:26.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:27.691\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 988.13ms\n", + "\u001b[32m2025-01-29 13:54:27.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:27.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 540.84ms\n", + "\u001b[32m2025-01-29 13:54:28.263\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.870\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 572.93ms\n", + "\u001b[32m2025-01-29 13:54:28.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:28.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:29.418\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 521.13ms\n", + "\u001b[32m2025-01-29 13:54:29.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:29.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 653.13ms\n", + "\u001b[32m2025-01-29 13:54:30.095\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.948\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 829.04ms\n", + "\u001b[32m2025-01-29 13:54:30.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:30.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:31.576\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 615.85ms\n", + "\u001b[32m2025-01-29 13:54:31.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:31.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.208\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 613.00ms\n", + "\u001b[32m2025-01-29 13:54:32.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.858\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 626.48ms\n", + "\u001b[32m2025-01-29 13:54:32.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:32.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.431\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 554.99ms\n", + "\u001b[32m2025-01-29 13:54:33.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 1706.03762.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:33.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.056\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 568.57ms\n", + "\u001b[32m2025-01-29 13:54:34.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 689.37ms\n", + "\u001b[32m2025-01-29 13:54:34.770\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:34.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.397\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 613.50ms\n", + "\u001b[32m2025-01-29 13:54:35.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 582.09ms\n", + "\u001b[32m2025-01-29 13:54:35.992\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:35.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.49ms\n", + "\u001b[32m2025-01-29 13:54:36.611\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:36.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:36.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.234\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 610.46ms\n", + "\u001b[32m2025-01-29 13:54:37.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.15ms\n", + "\u001b[32m2025-01-29 13:54:37.831\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:37.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 609.22ms\n", + "\u001b[32m2025-01-29 13:54:38.452\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:38.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:38.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.94ms\n", + "\u001b[32m2025-01-29 13:54:39.100\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 661.72ms\n", + "\u001b[32m2025-01-29 13:54:39.770\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:39.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.339\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 558.40ms\n", + "\u001b[32m2025-01-29 13:54:40.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2106.09685.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:40.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.023\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.57ms\n", + "\u001b[32m2025-01-29 13:54:41.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.666\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 635.63ms\n", + "\u001b[32m2025-01-29 13:54:41.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:41.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.74ms\n", + "\u001b[32m2025-01-29 13:54:42.310\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.904\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 584.31ms\n", + "\u001b[32m2025-01-29 13:54:42.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2201.11903.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:42.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.44ms\n", + "\u001b[32m2025-01-29 13:54:43.588\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:43.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:43.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 481.89ms\n", + "\u001b[32m2025-01-29 13:54:44.083\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 660.08ms\n", + "\u001b[32m2025-01-29 13:54:44.754\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:44.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 557.92ms\n", + "\u001b[32m2025-01-29 13:54:45.329\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 636.75ms\n", + "\u001b[32m2025-01-29 13:54:45.976\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:45.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.68ms\n", + "\u001b[32m2025-01-29 13:54:46.600\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:46.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:46.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.296\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 686.39ms\n", + "\u001b[32m2025-01-29 13:54:47.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.93ms\n", + "\u001b[32m2025-01-29 13:54:47.915\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2210.05189.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:47.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 635.76ms\n", + "\u001b[32m2025-01-29 13:54:48.574\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:48.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:48.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 582.96ms\n", + "\u001b[32m2025-01-29 13:54:49.168\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.864\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 685.88ms\n", + "\u001b[32m2025-01-29 13:54:49.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:49.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 559.12ms\n", + "\u001b[32m2025-01-29 13:54:50.436\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:50.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:50.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.69ms\n", + "\u001b[32m2025-01-29 13:54:51.056\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.10ms\n", + "\u001b[32m2025-01-29 13:54:51.676\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:51.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.359\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.73ms\n", + "\u001b[32m2025-01-29 13:54:52.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.956\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:52.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 588.01ms\n", + "\u001b[32m2025-01-29 13:54:53.555\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:53.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 585.27ms\n", + "\u001b[32m2025-01-29 13:54:53.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.227\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.84ms\n", + "\u001b[32m2025-01-29 13:54:54.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.02ms\n", + "\u001b[32m2025-01-29 13:54:54.872\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 557.19ms\n", + "\u001b[32m2025-01-29 13:54:55.438\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.98ms\n", + "\u001b[32m2025-01-29 13:54:56.098\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 584.96ms\n", + "\u001b[32m2025-01-29 13:54:56.697\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:56.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.18ms\n", + "\u001b[32m2025-01-29 13:54:57.422\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:57.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:57.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.42ms\n", + "\u001b[32m2025-01-29 13:54:58.019\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile CUDA_C_Programming_Guide.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.028\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.759\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 715.90ms\n", + "\u001b[32m2025-01-29 13:54:58.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:58.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 736.45ms\n", + "\u001b[32m2025-01-29 13:54:59.509\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:59.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:54:59.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.31ms\n", + "\u001b[32m2025-01-29 13:55:00.210\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.52ms\n", + "\u001b[32m2025-01-29 13:55:00.815\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:00.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:01.414\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:01.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 585.33ms\n", + "\u001b[32m2025-01-29 13:55:01.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.50ms\n", + "\u001b[32m2025-01-29 13:55:02.012\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.690\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:02.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 660.22ms\n", + "\u001b[32m2025-01-29 13:55:02.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.62ms\n", + "\u001b[32m2025-01-29 13:55:03.415\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:03.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:03.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.13ms\n", + "\u001b[32m2025-01-29 13:55:04.140\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.819\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 673.64ms\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.60ms\n", + "\u001b[32m2025-01-29 13:55:05.500\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:05.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:05.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.66ms\n", + "\u001b[32m2025-01-29 13:55:06.206\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile ?uri=OJ:L_202401689.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.893\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 664.22ms\n", + "\u001b[32m2025-01-29 13:55:06.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:06.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.95ms\n", + "\u001b[32m2025-01-29 13:55:07.517\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:07.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:07.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.62ms\n", + "\u001b[32m2025-01-29 13:55:08.136\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 835.55ms\n", + "\u001b[32m2025-01-29 13:55:08.983\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:08.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:09.730\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 738.10ms\n", + "\u001b[32m2025-01-29 13:55:09.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:09.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:10.352\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 610.85ms\n", + "\u001b[32m2025-01-29 13:55:10.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:10.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 809.95ms\n", + "\u001b[32m2025-01-29 13:55:11.175\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.79ms\n", + "\u001b[32m2025-01-29 13:55:11.794\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:11.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:12.465\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:12.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.49ms\n", + "\u001b[32m2025-01-29 13:55:12.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 835.45ms\n", + "\u001b[32m2025-01-29 13:55:13.310\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 7DUME_EN01_Rules.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.36ms\n", + "\u001b[32m2025-01-29 13:55:13.993\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:13.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.31ms\n", + "\u001b[32m2025-01-29 13:55:14.714\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:14.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:14.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.73ms\n", + "\u001b[32m2025-01-29 13:55:15.360\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:15.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:15.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.033\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.55ms\n", + "\u001b[32m2025-01-29 13:55:16.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.680\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:16.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 643.44ms\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 661.16ms\n", + "\u001b[32m2025-01-29 13:55:17.353\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:17.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:17.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.50ms\n", + "\u001b[32m2025-01-29 13:55:18.050\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.33ms\n", + "\u001b[32m2025-01-29 13:55:18.721\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:18.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 609.49ms\n", + "\u001b[32m2025-01-29 13:55:19.345\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:19.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:19.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.21ms\n", + "\u001b[32m2025-01-29 13:55:20.068\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.96ms\n", + "\u001b[32m2025-01-29 13:55:20.764\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:20.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.86ms\n", + "\u001b[32m2025-01-29 13:55:21.409\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:21.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:21.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.19ms\n", + "\u001b[32m2025-01-29 13:55:22.081\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.59ms\n", + "\u001b[32m2025-01-29 13:55:22.701\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:22.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.04ms\n", + "\u001b[32m2025-01-29 13:55:23.422\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile is_eotn_rulebook.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.39ms\n", + "\u001b[32m2025-01-29 13:55:24.159\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 614.42ms\n", + "\u001b[32m2025-01-29 13:55:24.783\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:24.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.58ms\n", + "\u001b[32m2025-01-29 13:55:25.454\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:25.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:25.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 860.42ms\n", + "\u001b[32m2025-01-29 13:55:26.327\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:26.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:26.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.94ms\n", + "\u001b[32m2025-01-29 13:55:27.024\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.696\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:27.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 669.67ms\n", + "\u001b[32m2025-01-29 13:55:28.420\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:28.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:28.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 721.91ms\n", + "\u001b[32m2025-01-29 13:55:29.116\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 687.25ms\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.55ms\n", + "\u001b[32m2025-01-29 13:55:29.813\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:29.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.97ms\n", + "\u001b[32m2025-01-29 13:55:30.434\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:30.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:30.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.87ms\n", + "\u001b[32m2025-01-29 13:55:31.077\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.734\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 644.82ms\n", + "\u001b[32m2025-01-29 13:55:31.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:31.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-01-29 13:55:32.540\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", + "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 763.87ms\n", + "\u001b[32m2025-01-29 13:55:32.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "3eW9TIKjHEgz", + "outputId": "186ae159-38de-4104-ca6d-c29d1172503f" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 99,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 0,\n \"max\": 98,\n \"num_unique_values\": 99,\n \"samples\": [\n 62,\n 40,\n 95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 59,\n \"samples\": [\n \"3 Model Architecture\",\n \"5.2 Hardware and Schedule\",\n \"CARD AND TILE EFFECTS\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 98,\n \"samples\": [\n \"Can you raid the locations of a player that has passed during the action phase?\",\n \"Is symbolic reasoning usually simple for humans but challenging for language models?\",\n \"How many AI-related regulations were enacted in the United States in 2023?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 21,\n \"samples\": [\n \"C\",\n \"4\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"GENERATION ERROR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
00https://arxiv.org/pdf/1706.037623 Model ArchitectureWhat type of architecture does the model use? ...CGENERATION ERRORNaN
11https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the encoder?6GENERATION ERRORNaN
22https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the decoder?6GENERATION ERRORNaN
33https://arxiv.org/pdf/1706.037623.2.2 Multi-Head AttentionHow many parallel attention heads are used?8GENERATION ERRORNaN
44https://arxiv.org/pdf/1706.037623.4 Embeddings and SoftmaxDoes the final model use learned embeddings fo...YESGENERATION ERRORNaN
........................
9494https://aiindex.stanford.edu/wp-content/upload...LLM Tokenization Introduces UnfairnessWhat are the three major inequalities resultin...BGENERATION ERRORNaN
9595https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationHow many AI-related regulations were enacted i...25GENERATION ERRORNaN
9696https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationWhich of the following was identified as a hig...BGENERATION ERRORNaN
9797https://aiindex.stanford.edu/wp-content/upload...EuropeWhich country had the highest proportion of fe...BGENERATION ERRORNaN
9898https://aiindex.stanford.edu/wp-content/upload...EuropeWhich countries reported the smallest proporti...CGENERATION ERRORNaN
\n", + "

99 rows × 7 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "0 0 https://arxiv.org/pdf/1706.03762 \n", + "1 1 https://arxiv.org/pdf/1706.03762 \n", + "2 2 https://arxiv.org/pdf/1706.03762 \n", + "3 3 https://arxiv.org/pdf/1706.03762 \n", + "4 4 https://arxiv.org/pdf/1706.03762 \n", + ".. ... ... \n", + "94 94 https://aiindex.stanford.edu/wp-content/upload... \n", + "95 95 https://aiindex.stanford.edu/wp-content/upload... \n", + "96 96 https://aiindex.stanford.edu/wp-content/upload... \n", + "97 97 https://aiindex.stanford.edu/wp-content/upload... \n", + "98 98 https://aiindex.stanford.edu/wp-content/upload... \n", + "\n", + " section \\\n", + "0 3 Model Architecture \n", + "1 3.1 Encoder and Decoder Stacks \n", + "2 3.1 Encoder and Decoder Stacks \n", + "3 3.2.2 Multi-Head Attention \n", + "4 3.4 Embeddings and Softmax \n", + ".. ... \n", + "94 LLM Tokenization Introduces Unfairness \n", + "95 U.S. Regulation \n", + "96 U.S. Regulation \n", + "97 Europe \n", + "98 Europe \n", + "\n", + " question answer \\\n", + "0 What type of architecture does the model use? ... C \n", + "1 How many layers compose the encoder? 6 \n", + "2 How many layers compose the decoder? 6 \n", + "3 How many parallel attention heads are used? 8 \n", + "4 Does the final model use learned embeddings fo... YES \n", + ".. ... ... \n", + "94 What are the three major inequalities resultin... B \n", + "95 How many AI-related regulations were enacted i... 25 \n", + "96 Which of the following was identified as a hig... B \n", + "97 Which country had the highest proportion of fe... B \n", + "98 Which countries reported the smallest proporti... C \n", + "\n", + " pred_answer pred_section \n", + "0 GENERATION ERROR NaN \n", + "1 GENERATION ERROR NaN \n", + "2 GENERATION ERROR NaN \n", + "3 GENERATION ERROR NaN \n", + "4 GENERATION ERROR NaN \n", + ".. ... ... \n", + "94 GENERATION ERROR NaN \n", + "95 GENERATION ERROR NaN \n", + "96 GENERATION ERROR NaN \n", + "97 GENERATION ERROR NaN \n", + "98 GENERATION ERROR NaN \n", + "\n", + "[99 rows x 7 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AhenESELHEgz", + "outputId": "4b5d7785-4d17-4c78-b0f8-d69fa50bad15" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 8300573a6fee37f061120a588eb9b76ccb02cf4b Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 11:37:12 +0100 Subject: [PATCH 097/120] Extend benchmark --- .../perfect_context/2.1 Pre-training Data | 100 ++++++++++++++++++ benchmark/perfect_context/2.3 Optimizer | 9 ++ benchmark/perfect_context/3 Main results.txt | 36 +++++++ .../5 Bias, Toxicity and Misinformation.txt | 16 +++ benchmark/perfect_context/5.2 CrowS-Pairs.txt | 32 ++++++ .../Accountability and responsibility.txt | 54 ++++++++++ ...l-purpose AI models with systemic risk.txt | 13 --- .../perfect_context/Codes of practice.txt | 39 ------- ...pliant AI systems which present a risk.txt | 20 ---- .../Data and data governance.txt | 49 --------- .../EU declaration of conformity.txt | 20 ---- ...European Artificial Intelligence Board.txt | 29 ----- .../Limitations of generative AI and LLMs.txt | 40 +++++++ benchmark/perfect_context/Penalties.txt | 57 ---------- .../Procurement in an emerging market.txt | 28 +++++ .../Reporting of serious incidents.txt | 40 ------- ...itions outside AI regulatory sandboxes.txt | 0 ...rs and deployers of certain AI systems.txt | 44 -------- benchmark/structured_qa.csv | 30 +++--- 19 files changed, 332 insertions(+), 324 deletions(-) create mode 100644 benchmark/perfect_context/2.1 Pre-training Data create mode 100644 benchmark/perfect_context/2.3 Optimizer create mode 100644 benchmark/perfect_context/3 Main results.txt create mode 100644 benchmark/perfect_context/5 Bias, Toxicity and Misinformation.txt create mode 100644 benchmark/perfect_context/5.2 CrowS-Pairs.txt create mode 100644 benchmark/perfect_context/Accountability and responsibility.txt delete mode 100644 benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt delete mode 100644 benchmark/perfect_context/Codes of practice.txt delete mode 100644 benchmark/perfect_context/Compliant AI systems which present a risk.txt delete mode 100644 benchmark/perfect_context/Data and data governance.txt delete mode 100644 benchmark/perfect_context/EU declaration of conformity.txt delete mode 100644 benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt create mode 100644 benchmark/perfect_context/Limitations of generative AI and LLMs.txt delete mode 100644 benchmark/perfect_context/Penalties.txt create mode 100644 benchmark/perfect_context/Procurement in an emerging market.txt delete mode 100644 benchmark/perfect_context/Reporting of serious incidents.txt delete mode 100644 benchmark/perfect_context/Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes.txt delete mode 100644 benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt diff --git a/benchmark/perfect_context/2.1 Pre-training Data b/benchmark/perfect_context/2.1 Pre-training Data new file mode 100644 index 0000000..47b96c3 --- /dev/null +++ b/benchmark/perfect_context/2.1 Pre-training Data @@ -0,0 +1,100 @@ +2 Approach +Our training approach is similar to the methods +described in previous work (Brown et al., 2020; +Chowdhery et al., 2022), and is inspired by the +Chinchilla scaling laws (Hoffmann et al., 2022). +We train large transformers on a large quantity of +textual data using a standard optimizer. +2.1 Pre-training Data +Our training dataset is a mixture of several sources, +reported in Table 1, that cover a diverse set of do- +mains. For the most part, we reuse data sources +that have been leveraged to train other LLMs, with +the restriction of only using data that is publicly +available, and compatible with open sourcing. This +leads to the following mixture of data and the per- +centage they represent in the training set: +English CommonCrawl [67%]. We preprocess +five CommonCrawl dumps, ranging from 2017 +to 2020, with the CCNet pipeline (Wenzek et al., +2020). This process deduplicates the data at the +line level, performs language identification with +a fastText linear classifier to remove non-English +pages and filters low quality content with an n- +gram language model. In addition, we trained a +linear model to classify pages used as references +in Wikipedia v.s. randomly sampled pages, and +discarded pages not classified as references. +C4 [15%]. During exploratory experiments, we +observed that using diverse pre-processed Com- +monCrawl datasets improves performance. We thus +included the publicly available C4 dataset (Raffel +et al., 2020) in our data. The preprocessing of C4 +also contains deduplication and language identifi- +cation steps: the main difference with CCNet is +the quality filtering, which mostly relies on heuris- +tics such as presence of punctuation marks or the +number of words and sentences in a webpage. +Github [4.5%]. We use the public GitHub +dataset available on Google BigQuery. We only +kept projects that are distributed under the Apache, +BSD and MIT licenses. Additionally, we filtered +low quality files with heuristics based on the line +length or proportion of alphanumeric characters, +and removed boilerplate, such as headers, with reg- +ular expressions. Finally, we deduplicate the result- +ing dataset at the file level, with exact matches. +Wikipedia [4.5%]. We add Wikipedia dumps +from the June-August 2022 period, covering 20 +Dataset Sampling prop. Epochs Disk size +CommonCrawl 67.0% 1.10 3.3 TB +C4 15.0% 1.06 783 GB +Github 4.5% 0.64 328 GB +Wikipedia 4.5% 2.45 83 GB +Books 4.5% 2.23 85 GB +ArXiv 2.5% 1.06 92 GB +StackExchange 2.0% 1.03 78 GB +Table 1: Pre-training data. Data mixtures used for pre- +training, for each subset we list the sampling propor- +tion, number of epochs performed on the subset when +training on 1.4T tokens, and disk size. The pre-training +runs on 1T tokens have the same sampling proportion. +languages, which use either the Latin or Cyrillic +scripts: bg, ca, cs, da, de, en, es, fr, hr, hu, it, +nl, pl, pt, ro, ru, sl, sr, sv, uk. We process the +data to remove hyperlinks, comments and other +formatting boilerplate. +Gutenberg and Books3 [4.5%]. We include +two book corpora in our training dataset: the Guten- +berg Project, which contains books that are in the +public domain, and the Books3 section of TheP- +ile (Gao et al., 2020), a publicly available dataset +for training large language models. We perform +deduplication at the book level, removing books +with more than 90% content overlap. +ArXiv [2.5%]. We process arXiv Latex files +to add scientific data to our dataset. Following +Lewkowycz et al. (2022), we removed everything +before the first section, as well as the bibliography. +We also removed the comments from the .tex files, +and inline-expanded definitions and macros written +by users to increase consistency across papers. +Stack Exchange [2%]. We include a dump of +Stack Exchange, a website of high quality ques- +tions and answers that covers a diverse set of do- +mains, ranging from computer science to chemistry. +We kept the data from the 28 largest websites, re- +moved the HTML tags from text and sorted the +answers by score (from highest to lowest). +Tokenizer. We tokenize the data with the byte- +pair encoding (BPE) algorithm (Sennrich et al., +2015), using the implementation from Sentence- +Piece (Kudo and Richardson, 2018). Notably, we +split all numbers into individual digits, and fallback +to bytes to decompose unknown UTF-8 characters. +Overall, our entire training dataset contains +roughly 1.4T tokens after tokenization. For most of +our training data, each token is used only once dur- +ing training, with the exception of the Wikipedia +and Books domains, over which we perform ap- +proximately two epochs. diff --git a/benchmark/perfect_context/2.3 Optimizer b/benchmark/perfect_context/2.3 Optimizer new file mode 100644 index 0000000..0a86d8c --- /dev/null +++ b/benchmark/perfect_context/2.3 Optimizer @@ -0,0 +1,9 @@ +Our models are trained using the AdamW opti- +mizer (Loshchilov and Hutter, 2017), with the fol- +lowing hyper-parameters: β1 = 0.9, β2 = 0.95. +We use a cosine learning rate schedule, such that +the final learning rate is equal to 10% of the maxi- +mal learning rate. We use a weight decay of 0.1 and +gradient clipping of 1.0. We use 2, 000 warmup +steps, and vary the learning rate and batch size with +the size of the model (see Table 2 for details). diff --git a/benchmark/perfect_context/3 Main results.txt b/benchmark/perfect_context/3 Main results.txt new file mode 100644 index 0000000..d7ea81e --- /dev/null +++ b/benchmark/perfect_context/3 Main results.txt @@ -0,0 +1,36 @@ +Following previous work (Brown et al., 2020), we +consider zero-shot and few-shot tasks, and report +results on a total of 20 benchmarks: +• Zero-shot. We provide a textual description +of the task and a test example. The model +either provides an answer using open-ended +generation, or ranks the proposed answers. +• Few-shot. We provide a few examples of the +task (between 1 and 64) and a test example. +The model takes this text as input and gener- +ates the answer or ranks different options. +We compare LLaMA with other foundation mod- +els, namely the non-publicly available language +models GPT-3 (Brown et al., 2020), Gopher (Rae +et al., 2021), Chinchilla (Hoffmann et al., 2022) +and PaLM (Chowdhery et al., 2022), as well as +the open-sourced OPT models (Zhang et al., 2022), +GPT-J (Wang and Komatsuzaki, 2021), and GPT- +Neo (Black et al., 2022). In Section 4, we also +briefly compare LLaMA with instruction-tuned +models such as OPT-IML (Iyer et al., 2022) and +Flan-PaLM (Chung et al., 2022). +We evaluate LLaMA on free-form generation +tasks and multiple choice tasks. In the multiple +choice tasks, the objective is to select the most +appropriate completion among a set of given op- +tions, based on a provided context. We select the +completion with the highest likelihood given the +provided context. We follow Gao et al. (2021) +and use the likelihood normalized by the number +of characters in the completion, except for certain +datasets (OpenBookQA, BoolQ), for which we fol- +low Brown et al. (2020), and select a completion +based on the likelihood normalized by the likeli- +hood of the completion given “Answer:” as context: +P (completion|context)/P (completion|“Answer:”). diff --git a/benchmark/perfect_context/5 Bias, Toxicity and Misinformation.txt b/benchmark/perfect_context/5 Bias, Toxicity and Misinformation.txt new file mode 100644 index 0000000..4f141ec --- /dev/null +++ b/benchmark/perfect_context/5 Bias, Toxicity and Misinformation.txt @@ -0,0 +1,16 @@ +Large language models have been showed to re- +produce and amplify biases that are existing in +the training data (Sheng et al., 2019; Kurita et al., +2019), and to generate toxic or offensive con- +tent (Gehman et al., 2020). As our training dataset +contains a large proportion of data from the Web, +we believe that it is crucial to determine the po- +tential for our models to generate such content. +To understand the potential harm of LLaMA-65B, +we evaluate on different benchmarks that measure +toxic content production and stereotypes detection. +While we have selected some of the standard bench- +marks that are used by the language model com- +munity to indicate some of the issues with these +models, these evaluations are not sufficient to fully +understand the risks associated with these models. diff --git a/benchmark/perfect_context/5.2 CrowS-Pairs.txt b/benchmark/perfect_context/5.2 CrowS-Pairs.txt new file mode 100644 index 0000000..fc7e72a --- /dev/null +++ b/benchmark/perfect_context/5.2 CrowS-Pairs.txt @@ -0,0 +1,32 @@ +LLaMA GPT3 OPT +Gender 70.6 62.6 65.7 +Religion 79.0 73.3 68.6 +Race/Color 57.0 64.7 68.6 +Sexual orientation 81.0 76.2 78.6 +Age 70.1 64.4 67.8 +Nationality 64.2 61.6 62.9 +Disability 66.7 76.7 76.7 +Physical appearance 77.8 74.6 76.2 +Socioeconomic status 71.5 73.8 76.2 +Average 66.6 67.2 69.5 +Table 12: CrowS-Pairs. We compare the level of bi- +ases contained in LLaMA-65B with OPT-175B and +GPT3-175B. Higher score indicates higher bias. +5.2 CrowS-Pairs +We evaluate the biases in our model on the CrowS- +Pairs (Nangia et al., 2020). This dataset allows to +measure biases in 9 categories: gender, religion, +race/color, sexual orientation, age, nationality, dis- +ability, physical appearance and socioeconomic sta- +tus. Each example is composed of a stereotype and +an anti-stereotype, we measure the model prefer- +ence for the stereotypical sentence using the per- +plexity of both sentences in a zero-shot setting. +Higher scores thus indicate higher bias. We com- +pare with GPT-3 and OPT-175B in Table 12. +LLaMA compares slightly favorably to both +models on average. Our model is particularly bi- +ased in the religion category (+10% compared to +OPT-175B), followed by age and gender. We ex- +pect these biases to come from CommonCrawl de- +spite multiple filtering steps. diff --git a/benchmark/perfect_context/Accountability and responsibility.txt b/benchmark/perfect_context/Accountability and responsibility.txt new file mode 100644 index 0000000..56ca248 --- /dev/null +++ b/benchmark/perfect_context/Accountability and responsibility.txt @@ -0,0 +1,54 @@ +Accountability and responsibility +Ensuring accountability for generative AI means that individuals and organisations can be +held accountable for the AI systems they develop, deploy, or use, and that human oversight +is maintained. To establish accountable practices across the AI lifecycle, you should +consider three key elements. +• Answerability: you should establish a chain of human responsibility across the generative +AI project lifecycle, including responsibility throughout the supply chain. In cases of +harm or errors caused by generative AI, recourse and feedback mechanisms need to be +established for affected individuals. Identifying the specific actors involved in generative AI +systems is vital to answerability. This includes model developers, application developers, +policymakers, regulators, system operators and end-users. The roles and responsibilities +of each must be clearly defined and aligned with legal and ethical standards. +• Auditability: you should demonstrate the responsibility and trustworthiness of +the development and deployment practices by upholding robust reporting and +documentation protocols, and retaining traceability throughout the AI lifecycle. This refers +to the process by which all stages of the generative AI innovation lifecycle from data +collection and base model training to implementation, fine-tuning, system deployment, +updating, and retirement are documented in a way that is accessible to relevant +stakeholders and easily understood. +• Liability: you should make sure that all parties involved in the generative AI project +lifecycle, from vendors and technical teams to system users, are acting lawfully and +understand their respective legal obligations. +As an end-user, being accountable means taking responsibility for a system’s outputs and +generated content and its potential consequences. This includes checking that these are +factual, truthful, non-discriminatory, non-harmful, and do not violate existing legal provisions, +guidelines, policies or the providers’ terms of use. It entails putting the necessary oversight +and human-in-the-loop processes in place to validate output in situations with high impact +or risk. Where these risks are too high, you must consider if generative AI should be used. +Ultimately, responsibility for any output or decision made or supported by an AI system +always rests with the public organisation. Where generative AI is bought commercially, +ensure that vendors understand their responsibilities and liabilities, put the required risk +mitigations in place and share all relevant information. Refer to the Buying generative AI +section for further guidance. +Practical recommendations +Follow existing legal provisions, guidelines and policies as well as the provider’s +terms of use when developing, deploying or using generative AI. +As an end-user, assume responsibility for output produced by generative AI tools +when used to support everyday tasks, such as drafting emails and reports. +Clearly define responsibilities, accountability, and liability across all actors involved +in the AI lifecycle. Where the generative AI is bought commercially, define detailed +responsibilities and liability contractually. +Nominate a Senior Responsible Owner who will be accountable for the use of +generative AI in a specific project. +Where generative AI is used in situations of high impact or risk, establish a +human-in-the-loop to oversee and validate outputs. +Adopt a risk-based approach to the use of AI-generated content and put +strategies in place to minimise the risk of inaccurate or harmful outputs. Where +the potential risks and harmful impacts are too high, consider whether human-in- +the-loop approaches offer sufficient mitigation or if generative AI should be used. +Provide routes for appeal and actionable redress and put feedback +channels into place. +Use assurance techniques to evaluate the performance of generative AI systems. +The CDEI AI assurance guide provides a useful starting point, and the CDEI +portfolio of AI assurance techniques offers real-world examples. diff --git a/benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt b/benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt deleted file mode 100644 index 43cd859..0000000 --- a/benchmark/perfect_context/Classification of general-purpose AI models as general-purpose AI models with systemic risk.txt +++ /dev/null @@ -1,13 +0,0 @@ -1. A general-purpose AI model shall be classified as a general-purpose AI model with systemic risk if it meets any of the -following conditions: -(a) it has high impact capabilities evaluated on the basis of appropriate technical tools and methodologies, including -indicators and benchmarks; -(b) based on a decision of the Commission, ex officio or following a qualified alert from the scientific panel, it has -capabilities or an impact equivalent to those set out in point (a) having regard to the criteria set out in Annex XIII. -2. A general-purpose AI model shall be presumed to have high impact capabilities pursuant to paragraph 1, point (a), -when the cumulative amount of computation used for its training measured in floating point operations is greater than -1025. -3. The Commission shall adopt delegated acts in accordance with Article 97 to amend the thresholds listed in -paragraphs 1 and 2 of this Article, as well as to supplement benchmarks and indicators in light of evolving technological -developments, such as algorithmic improvements or increased hardware efficiency, when necessary, for these thresholds to -reflect the state of the art. diff --git a/benchmark/perfect_context/Codes of practice.txt b/benchmark/perfect_context/Codes of practice.txt deleted file mode 100644 index be7daa0..0000000 --- a/benchmark/perfect_context/Codes of practice.txt +++ /dev/null @@ -1,39 +0,0 @@ - ELI: http://data.europa.eu/eli/reg/2024/1689/oj -(a) the means to ensure that the information referred to in Article 53(1), points (a) and (b), is kept up to date in light of -market and technological developments; -(b) the adequate level of detail for the summary about the content used for training; -(c) the identification of the type and nature of the systemic risks at Union level, including their sources, where appropriate; -(d) the measures, procedures and modalities for the assessment and management of the systemic risks at Union level, -including the documentation thereof, which shall be proportionate to the risks, take into consideration their severity -and probability and take into account the specific challenges of tackling those risks in light of the possible ways in -which such risks may emerge and materialise along the AI value chain. -3. The AI Office may invite all providers of general-purpose AI models, as well as relevant national competent -authorities, to participate in the drawing-up of codes of practice. Civil society organisations, industry, academia and other -relevant stakeholders, such as downstream providers and independent experts, may support the process. -4. The AI Office and the Board shall aim to ensure that the codes of practice clearly set out their specific objectives and -contain commitments or measures, including key performance indicators as appropriate, to ensure the achievement of -those objectives, and that they take due account of the needs and interests of all interested parties, including affected -persons, at Union level. -5. The AI Office shall aim to ensure that participants to the codes of practice report regularly to the AI Office on the -implementation of the commitments and the measures taken and their outcomes, including as measured against the key -performance indicators as appropriate. Key performance indicators and reporting commitments shall reflect differences in -size and capacity between various participants. -6. The AI Office and the Board shall regularly monitor and evaluate the achievement of the objectives of the codes of -practice by the participants and their contribution to the proper application of this Regulation. The AI Office and the Board -shall assess whether the codes of practice cover the obligations provided for in Articles 53 and 55, and shall regularly -monitor and evaluate the achievement of their objectives. They shall publish their assessment of the adequacy of the codes -of practice. -The Commission may, by way of an implementing act, approve a code of practice and give it a general validity within the -Union. That implementing act shall be adopted in accordance with the examination procedure referred to in Article 98(2). -7. The AI Office may invite all providers of general-purpose AI models to adhere to the codes of practice. For providers -of general-purpose AI models not presenting systemic risks this adherence may be limited to the obligations provided for in -Article 53, unless they declare explicitly their interest to join the full code. -8. The AI Office shall, as appropriate, also encourage and facilitate the review and adaptation of the codes of practice, in -particular in light of emerging standards. The AI Office shall assist in the assessment of available standards. -9. Codes of practice shall be ready at the latest by 2 May 2025. The AI Office shall take the necessary steps, including -inviting providers pursuant to paragraph 7. -If, by 2 August 2025, a code of practice cannot be finalised, or if the AI Office deems it is not adequate following its -assessment under paragraph 6 of this Article, the Commission may provide, by means of implementing acts, common rules -for the implementation of the obligations provided for in Articles 53 and 55, including the issues set out in paragraph 2 of -this Article. Those implementing acts shall be adopted in accordance with the examination procedure referred to in Article -98(2). diff --git a/benchmark/perfect_context/Compliant AI systems which present a risk.txt b/benchmark/perfect_context/Compliant AI systems which present a risk.txt deleted file mode 100644 index 0efeef3..0000000 --- a/benchmark/perfect_context/Compliant AI systems which present a risk.txt +++ /dev/null @@ -1,20 +0,0 @@ -1. Where, having performed an evaluation under Article 79, after consulting the relevant national public authority -referred to in Article 77(1), the market surveillance authority of a Member State finds that although a high-risk AI system -complies with this Regulation, it nevertheless presents a risk to the health or safety of persons, to fundamental rights, or to -other aspects of public interest protection, it shall require the relevant operator to take all appropriate measures to ensure -that the AI system concerned, when placed on the market or put into service, no longer presents that risk without undue -delay, within a period it may prescribe. -EN OJ L, 12.7.2024 -108/144 ELI: http://data.europa.eu/eli/reg/2024/1689/oj -2. The provider or other relevant operator shall ensure that corrective action is taken in respect of all the AI systems -concerned that it has made available on the Union market within the timeline prescribed by the market surveillance -authority of the Member State referred to in paragraph 1. -3. The Member States shall immediately inform the Commission and the other Member States of a finding under -paragraph 1. That information shall include all available details, in particular the data necessary for the identification of the -AI system concerned, the origin and the supply chain of the AI system, the nature of the risk involved and the nature and -duration of the national measures taken. -4. The Commission shall without undue delay enter into consultation with the Member States concerned and the -relevant operators, and shall evaluate the national measures taken. On the basis of the results of that evaluation, the -Commission shall decide whether the measure is justified and, where necessary, propose other appropriate measures. -5. The Commission shall immediately communicate its decision to the Member States concerned and to the relevant -operators. It shall also inform the other Member States. diff --git a/benchmark/perfect_context/Data and data governance.txt b/benchmark/perfect_context/Data and data governance.txt deleted file mode 100644 index ad49e6a..0000000 --- a/benchmark/perfect_context/Data and data governance.txt +++ /dev/null @@ -1,49 +0,0 @@ -1. High-risk AI systems which make use of techniques involving the training of AI models with data shall be developed -on the basis of training, validation and testing data sets that meet the quality criteria referred to in paragraphs 2 to 5 -whenever such data sets are used. -2. Training, validation and testing data sets shall be subject to data governance and management practices appropriate -for the intended purpose of the high-risk AI system. Those practices shall concern in particular: -(a) the relevant design choices; -(b) data collection processes and the origin of data, and in the case of personal data, the original purpose of the data -collection; -(c) relevant data-preparation processing operations, such as annotation, labelling, cleaning, updating, enrichment and -aggregation; -(d) the formulation of assumptions, in particular with respect to the information that the data are supposed to measure and -represent; -(e) an assessment of the availability, quantity and suitability of the data sets that are needed; -(f) examination in view of possible biases that are likely to affect the health and safety of persons, have a negative impact -on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence -inputs for future operations; -(g) appropriate measures to detect, prevent and mitigate possible biases identified according to point (f); -(h) the identification of relevant data gaps or shortcomings that prevent compliance with this Regulation, and how those -gaps and shortcomings can be addressed. -3. Training, validation and testing data sets shall be relevant, sufficiently representative, and to the best extent possible, -free of errors and complete in view of the intended purpose. They shall have the appropriate statistical properties, including, -where applicable, as regards the persons or groups of persons in relation to whom the high-risk AI system is intended to be -used. Those characteristics of the data sets may be met at the level of individual data sets or at the level of a combination -thereof. -4. Data sets shall take into account, to the extent required by the intended purpose, the characteristics or elements that -are particular to the specific geographical, contextual, behavioural or functional setting within which the high-risk AI -system is intended to be used. -OJ L, 12.7.2024 EN -ELI: http://data.europa.eu/eli/reg/2024/1689/oj 57/144 -5. To the extent that it is strictly necessary for the purpose of ensuring bias detection and correction in relation to the -high-risk AI systems in accordance with paragraph (2), points (f) and (g) of this Article, the providers of such systems may -exceptionally process special categories of personal data, subject to appropriate safeguards for the fundamental rights and -freedoms of natural persons. In addition to the provisions set out in Regulations (EU) 2016/679 and (EU) 2018/1725 and -Directive (EU) 2016/680, all the following conditions must be met in order for such processing to occur: -(a) the bias detection and correction cannot be effectively fulfilled by processing other data, including synthetic or -anonymised data; -(b) the special categories of personal data are subject to technical limitations on the re-use of the personal data, and -state-of-the-art security and privacy-preserving measures, including pseudonymisation; -(c) the special categories of personal data are subject to measures to ensure that the personal data processed are secured, -protected, subject to suitable safeguards, including strict controls and documentation of the access, to avoid misuse and -ensure that only authorised persons have access to those personal data with appropriate confidentiality obligations; -(d) the special categories of personal data are not to be transmitted, transferred or otherwise accessed by other parties; -(e) the special categories of personal data are deleted once the bias has been corrected or the personal data has reached the -end of its retention period, whichever comes first; -(f) the records of processing activities pursuant to Regulations (EU) 2016/679 and (EU) 2018/1725 and Directive (EU) -2016/680 include the reasons why the processing of special categories of personal data was strictly necessary to detect -and correct biases, and why that objective could not be achieved by processing other data. -6. For the development of high-risk AI systems not using techniques involving the training of AI models, paragraphs 2 -to 5 apply only to the testing data sets. diff --git a/benchmark/perfect_context/EU declaration of conformity.txt b/benchmark/perfect_context/EU declaration of conformity.txt deleted file mode 100644 index 58660d6..0000000 --- a/benchmark/perfect_context/EU declaration of conformity.txt +++ /dev/null @@ -1,20 +0,0 @@ -1. The provider shall draw up a written machine readable, physical or electronically signed EU declaration of conformity -for each high-risk AI system, and keep it at the disposal of the national competent authorities for 10 years after the -high-risk AI system has been placed on the market or put into service. The EU declaration of conformity shall identify the -high-risk AI system for which it has been drawn up. A copy of the EU declaration of conformity shall be submitted to the -relevant national competent authorities upon request. -EN OJ L, 12.7.2024 -80/144 ELI: http://data.europa.eu/eli/reg/2024/1689/oj -2. The EU declaration of conformity shall state that the high-risk AI system concerned meets the requirements set out in -Section 2. The EU declaration of conformity shall contain the information set out in Annex V, and shall be translated into -a language that can be easily understood by the national competent authorities of the Member States in which the high-risk -AI system is placed on the market or made available. -3. Where high-risk AI systems are subject to other Union harmonisation legislation which also requires an EU -declaration of conformity, a single EU declaration of conformity shall be drawn up in respect of all Union law applicable to -the high-risk AI system. The declaration shall contain all the information required to identify the Union harmonisation -legislation to which the declaration relates. -4. By drawing up the EU declaration of conformity, the provider shall assume responsibility for compliance with the -requirements set out in Section 2. The provider shall keep the EU declaration of conformity up-to-date as appropriate. -5. The Commission is empowered to adopt delegated acts in accordance with Article 97 in order to amend Annex V by -updating the content of the EU declaration of conformity set out in that Annex, in order to introduce elements that become -necessary in light of technical progress. diff --git a/benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt b/benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt deleted file mode 100644 index c3e0f6e..0000000 --- a/benchmark/perfect_context/Establishment and structure of the European Artificial Intelligence Board.txt +++ /dev/null @@ -1,29 +0,0 @@ -1. A European Artificial Intelligence Board (the ‘Board’) is hereby established. -2. The Board shall be composed of one representative per Member State. The European Data Protection Supervisor shall -participate as observer. The AI Office shall also attend the Board’s meetings, without taking part in the votes. Other national -and Union authorities, bodies or experts may be invited to the meetings by the Board on a case by case basis, where the -issues discussed are of relevance for them. -3. Each representative shall be designated by their Member State for a period of three years, renewable once. -4. Member States shall ensure that their representatives on the Board: -(a) have the relevant competences and powers in their Member State so as to contribute actively to the achievement of the -Board’s tasks referred to in Article 66; -(b) are designated as a single contact point vis-à-vis the Board and, where appropriate, taking into account Member States’ -needs, as a single contact point for stakeholders; -(c) are empowered to facilitate consistency and coordination between national competent authorities in their Member State -as regards the implementation of this Regulation, including through the collection of relevant data and information for -the purpose of fulfilling their tasks on the Board. -5. The designated representatives of the Member States shall adopt the Board’s rules of procedure by a two-thirds -majority. The rules of procedure shall, in particular, lay down procedures for the selection process, the duration of the -mandate of, and specifications of the tasks of, the Chair, detailed arrangements for voting, and the organisation of the -Board’s activities and those of its sub-groups. -6. The Board shall establish two standing sub-groups to provide a platform for cooperation and exchange among market -surveillance authorities and notifying authorities about issues related to market surveillance and notified bodies respectively. -The standing sub-group for market surveillance should act as the administrative cooperation group (ADCO) for this -Regulation within the meaning of Article 30 of Regulation (EU) 2019/1020. -The Board may establish other standing or temporary sub-groups as appropriate for the purpose of examining specific -issues. Where appropriate, representatives of the advisory forum referred to in Article 67 may be invited to such sub-groups -or to specific meetings of those subgroups as observers. -7. The Board shall be organised and operated so as to safeguard the objectivity and impartiality of its activities. -8. The Board shall be chaired by one of the representatives of the Member States. The AI Office shall provide the -secretariat for the Board, convene the meetings upon request of the Chair, and prepare the agenda in accordance with the -tasks of the Board pursuant to this Regulation and its rules of procedure. diff --git a/benchmark/perfect_context/Limitations of generative AI and LLMs.txt b/benchmark/perfect_context/Limitations of generative AI and LLMs.txt new file mode 100644 index 0000000..b9d3e14 --- /dev/null +++ b/benchmark/perfect_context/Limitations of generative AI and LLMs.txt @@ -0,0 +1,40 @@ +LLMs predict the next word in a sequence. They don’t understand the content or meaning +of the words beyond how likely they are to be used in response to a particular question. +This means that even though LLMs can produce plausible responses to requests, there are +limitations on what they can reliably do. +You need to be aware of these limitations and have checks and assurance in place when +using generative AI in your organisation. +• Hallucination (also called confabulation): LLMs are primarily designed to prioritise the +appearance of being plausible rather than focusing on ensuring absolute accuracy, +frequently resulting in the creation of content that appears plausible but may actually be +factually incorrect. +• Critical thinking and judgement: although LLMs can give the appearance of reasoning, +they are simply predicting the next most plausible word in their output, and may produce +inaccurate or poorly-reasoned conclusions. +• Sensitive or ethical context: LLMs can generate offensive, biased, or inappropriate +content if not properly guided, as they will replicate any bias present in the data they +were trained on. +• Domain expertise: unless specifically trained on specialist data, LLMs are not true +domain experts. On their own, they are not a substitute for professional advice, +especially in legal, medical, or other critical areas where precise and contextually relevant +information is essential. +ersonal experience and context: LLMs lack personal experiences and emotions. +Although their outputs may appear as if they come from a person, they do not have true +understanding or a consciousness. +• Dynamic real-time information retrieval: LLMs do not always have real-time access to +the internet or data outside their training set. However, this feature of LLM products is +changing. As of October 2023, ChatGPT, Bard and Bing have been modified to include +access to real-time internet data in their results. +• Short-term memory: LLMs have a limited context window. They might lose track of the +context of a conversation if it’s too long, leading to incoherent responses. +• Explainability: generative AI is based on neural networks, which are so-called ‘black +boxes’. This makes it difficult or impossible to explain the inner workings of the model +which has potential implications if in the future you are challenged to justify decisioning +or guidance based on the model. +These limitations mean that there are types of use cases where you should currently avoid +using generative AI, such as safety-of-life systems or those involving fully automated +decision-making which affects individuals. +However, the capabilities and limitations of generative AI solutions are rapidly changing, +and solution providers are continuously striving to overcome these limitations. This means +that you should make sure that you understand the features of the products and services +you are using and how they are expected to change. diff --git a/benchmark/perfect_context/Penalties.txt b/benchmark/perfect_context/Penalties.txt deleted file mode 100644 index bfb9d1c..0000000 --- a/benchmark/perfect_context/Penalties.txt +++ /dev/null @@ -1,57 +0,0 @@ -1. In accordance with the terms and conditions laid down in this Regulation, Member States shall lay down the rules on -penalties and other enforcement measures, which may also include warnings and non-monetary measures, applicable to -infringements of this Regulation by operators, and shall take all measures necessary to ensure that they are properly and -effectively implemented, thereby taking into account the guidelines issued by the Commission pursuant to Article 96. The -penalties provided for shall be effective, proportionate and dissuasive. They shall take into account the interests of SMEs, -including start-ups, and their economic viability. -2. The Member States shall, without delay and at the latest by the date of entry into application, notify the Commission -of the rules on penalties and of other enforcement measures referred to in paragraph 1, and shall notify it, without delay, of -any subsequent amendment to them. -3. Non-compliance with the prohibition of the AI practices referred to in Article 5 shall be subject to administrative -fines of up to EUR 35 000 000 or, if the offender is an undertaking, up to 7 % of its total worldwide annual turnover for the -preceding financial year, whichever is higher. -4. Non-compliance with any of the following provisions related to operators or notified bodies, other than those laid -down in Articles 5, shall be subject to administrative fines of up to EUR 15 000 000 or, if the offender is an undertaking, up -to 3 % of its total worldwide annual turnover for the preceding financial year, whichever is higher: -(a) obligations of providers pursuant to Article 16; -(b) obligations of authorised representatives pursuant to Article 22; -(c) obligations of importers pursuant to Article 23; -(d) obligations of distributors pursuant to Article 24; -(e) obligations of deployers pursuant to Article 26; -(f) requirements and obligations of notified bodies pursuant to Article 31, Article 33(1), (3) and (4) or Article 34; -(g) transparency obligations for providers and deployers pursuant to Article 50. -5. The supply of incorrect, incomplete or misleading information to notified bodies or national competent authorities in -reply to a request shall be subject to administrative fines of up to EUR 7 500 000 or, if the offender is an undertaking, up to -1 % of its total worldwide annual turnover for the preceding financial year, whichever is higher. -6. In the case of SMEs, including start-ups, each fine referred to in this Article shall be up to the percentages or amount -referred to in paragraphs 3, 4 and 5, whichever thereof is lower. -7. When deciding whether to impose an administrative fine and when deciding on the amount of the administrative fine -in each individual case, all relevant circumstances of the specific situation shall be taken into account and, as appropriate, -regard shall be given to the following: -(a) the nature, gravity and duration of the infringement and of its consequences, taking into account the purpose of the AI -system, as well as, where appropriate, the number of affected persons and the level of damage suffered by them; -(b) whether administrative fines have already been applied by other market surveillance authorities to the same operator for -the same infringement; -(c) whether administrative fines have already been applied by other authorities to the same operator for infringements of -other Union or national law, when such infringements result from the same activity or omission constituting a relevant -infringement of this Regulation; -(d) the size, the annual turnover and market share of the operator committing the infringement; -(e) any other aggravating or mitigating factor applicable to the circumstances of the case, such as financial benefits gained, -or losses avoided, directly or indirectly, from the infringement; -(f) the degree of cooperation with the national competent authorities, in order to remedy the infringement and mitigate the -possible adverse effects of the infringement; -(g) the degree of responsibility of the operator taking into account the technical and organisational measures implemented -by it; -(h) the manner in which the infringement became known to the national competent authorities, in particular whether, and -if so to what extent, the operator notified the infringement; -(i) the intentional or negligent character of the infringement; -(j) any action taken by the operator to mitigate the harm suffered by the affected persons. -8. Each Member State shall lay down rules on to what extent administrative fines may be imposed on public authorities -and bodies established in that Member State. -9. Depending on the legal system of the Member States, the rules on administrative fines may be applied in such -a manner that the fines are imposed by competent national courts or by other bodies, as applicable in those Member States. -The application of such rules in those Member States shall have an equivalent effect. -10. The exercise of powers under this Article shall be subject to appropriate procedural safeguards in accordance with -Union and national law, including effective judicial remedies and due process. -11. Member States shall, on an annual basis, report to the Commission about the administrative fines they have issued -during that year, in accordance with this Article, and about any related litigation or judicial proceedings diff --git a/benchmark/perfect_context/Procurement in an emerging market.txt b/benchmark/perfect_context/Procurement in an emerging market.txt new file mode 100644 index 0000000..547eb7a --- /dev/null +++ b/benchmark/perfect_context/Procurement in an emerging market.txt @@ -0,0 +1,28 @@ +Commercial agreements +AI is an emerging market. As well as rapidly evolving technology, there are ongoing changes +in the supply base and the products and services it offers. DPSs offer flexibility for new +suppliers to join, which often complement these dynamics well for buyers. +Any public sector buyers interested in shaping CCS’s longer term commercial agreement +portfolio should express their interest via info@crowncommercial.gov.uk +Regulation and policy +Regulation and policy will also evolve to keep pace. However, there are already a number of +legal and regulatory provisions which are relevant to the use of AI technologies. +• UK data protection law: regulation around automated decision making, processing +personal data, processing for the purpose of developing and training AI technologies. In +November 2022, a new Procurement Policy Note was published to provide an update to +this: PPN 03/22 Updated guidance on data protection legislation. +• Online Safety Act: provisions concerning design and use of algorithms are to be +included in a new set of laws to protect children and adults online. It will make social +media companies more responsible for their users’ safety on their platforms. +• A pro-innovation approach to AI regulation: this white paper published in March 2023, +sets out early steps towards establishing a regulatory regime for AI. The white paper +outlines a proportionate pro-innovation framework, including five principles to guide +responsible AI innovation in all sectors. +• Centre for Data Ethics and Innovation (CDEI) AI assurance techniques: the portfolio +of AI assurance techniques has been developed by the CDEI, initially in collaboration with +techUK. The portfolio is useful for anybody involved in designing, developing, deploying +or procuring AI-enabled systems. It shows examples of AI assurance techniques being +used in the real-world to support the development of trustworthy AI. +Further guidance is also available from the Information Commissioner’s Office, Equality +and Human Rights Commission, Medicines and Healthcare products Regulation +Authority and the Health and Safety Executive. diff --git a/benchmark/perfect_context/Reporting of serious incidents.txt b/benchmark/perfect_context/Reporting of serious incidents.txt deleted file mode 100644 index 74bc4c6..0000000 --- a/benchmark/perfect_context/Reporting of serious incidents.txt +++ /dev/null @@ -1,40 +0,0 @@ -1. Providers of high-risk AI systems placed on the Union market shall report any serious incident to the market -surveillance authorities of the Member States where that incident occurred. -OJ L, 12.7.2024 EN -ELI: http://data.europa.eu/eli/reg/2024/1689/oj 101/144 -2. The report referred to in paragraph 1 shall be made immediately after the provider has established a causal link -between the AI system and the serious incident or the reasonable likelihood of such a link, and, in any event, not later than -15 days after the provider or, where applicable, the deployer, becomes aware of the serious incident. -The period for the reporting referred to in the first subparagraph shall take account of the severity of the serious incident. -3. Notwithstanding paragraph 2 of this Article, in the event of a widespread infringement or a serious incident as -defined in Article 3, point (49)(b), the report referred to in paragraph 1 of this Article shall be provided immediately, and -not later than two days after the provider or, where applicable, the deployer becomes aware of that incident. -4. Notwithstanding paragraph 2, in the event of the death of a person, the report shall be provided immediately after the -provider or the deployer has established, or as soon as it suspects, a causal relationship between the high-risk AI system and -the serious incident, but not later than 10 days after the date on which the provider or, where applicable, the deployer -becomes aware of the serious incident. -5. Where necessary to ensure timely reporting, the provider or, where applicable, the deployer, may submit an initial -report that is incomplete, followed by a complete report. -6. Following the reporting of a serious incident pursuant to paragraph 1, the provider shall, without delay, perform the -necessary investigations in relation to the serious incident and the AI system concerned. This shall include a risk assessment -of the incident, and corrective action. -The provider shall cooperate with the competent authorities, and where relevant with the notified body concerned, during -the investigations referred to in the first subparagraph, and shall not perform any investigation which involves altering the -AI system concerned in a way which may affect any subsequent evaluation of the causes of the incident, prior to informing -the competent authorities of such action. -7. Upon receiving a notification related to a serious incident referred to in Article 3, point (49)(c), the relevant market -surveillance authority shall inform the national public authorities or bodies referred to in Article 77(1). The Commission -shall develop dedicated guidance to facilitate compliance with the obligations set out in paragraph 1 of this Article. That -guidance shall be issued by 2 August 2025, and shall be assessed regularly. -8. The market surveillance authority shall take appropriate measures, as provided for in Article 19 of Regulation (EU) -2019/1020, within seven days from the date it received the notification referred to in paragraph 1 of this Article, and shall -follow the notification procedures as provided in that Regulation. -9. For high-risk AI systems referred to in Annex III that are placed on the market or put into service by providers that are -subject to Union legislative instruments laying down reporting obligations equivalent to those set out in this Regulation, the -notification of serious incidents shall be limited to those referred to in Article 3, point (49)(c). -10. For high-risk AI systems which are safety components of devices, or are themselves devices, covered by Regulations -(EU) 2017/745 and (EU) 2017/746, the notification of serious incidents shall be limited to those referred to in Article 3, -point (49)(c) of this Regulation, and shall be made to the national competent authority chosen for that purpose by the -Member States where the incident occurred. -11. National competent authorities shall immediately notify the Commission of any serious incident, whether or not -they have taken action on it, in accordance with Article 20 of Regulation (EU) 2019/1020 diff --git a/benchmark/perfect_context/Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes.txt b/benchmark/perfect_context/Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes.txt deleted file mode 100644 index e69de29..0000000 diff --git a/benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt b/benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt deleted file mode 100644 index 9ca892f..0000000 --- a/benchmark/perfect_context/Transparency obligations for providers and deployers of certain AI systems.txt +++ /dev/null @@ -1,44 +0,0 @@ -1. Providers shall ensure that AI systems intended to interact directly with natural persons are designed and developed in -such a way that the natural persons concerned are informed that they are interacting with an AI system, unless this is -obvious from the point of view of a natural person who is reasonably well-informed, observant and circumspect, taking -into account the circumstances and the context of use. This obligation shall not apply to AI systems authorised by law to -detect, prevent, investigate or prosecute criminal offences, subject to appropriate safeguards for the rights and freedoms of -third parties, unless those systems are available for the public to report a criminal offence. -2. Providers of AI systems, including general-purpose AI systems, generating synthetic audio, image, video or text -content, shall ensure that the outputs of the AI system are marked in a machine-readable format and detectable as -artificially generated or manipulated. Providers shall ensure their technical solutions are effective, interoperable, robust and -reliable as far as this is technically feasible, taking into account the specificities and limitations of various types of content, -the costs of implementation and the generally acknowledged state of the art, as may be reflected in relevant technical -standards. This obligation shall not apply to the extent the AI systems perform an assistive function for standard editing or -do not substantially alter the input data provided by the deployer or the semantics thereof, or where authorised by law to -detect, prevent, investigate or prosecute criminal offences. -3. Deployers of an emotion recognition system or a biometric categorisation system shall inform the natural persons -exposed thereto of the operation of the system, and shall process the personal data in accordance with Regulations (EU) -2016/679 and (EU) 2018/1725 and Directive (EU) 2016/680, as applicable. This obligation shall not apply to AI systems -used for biometric categorisation and emotion recognition, which are permitted by law to detect, prevent or investigate -criminal offences, subject to appropriate safeguards for the rights and freedoms of third parties, and in accordance with -Union law. -4. Deployers of an AI system that generates or manipulates image, audio or video content constituting a deep fake, shall -disclose that the content has been artificially generated or manipulated. This obligation shall not apply where the use is -authorised by law to detect, prevent, investigate or prosecute criminal offence. Where the content forms part of an evidently -artistic, creative, satirical, fictional or analogous work or programme, the transparency obligations set out in this paragraph -are limited to disclosure of the existence of such generated or manipulated content in an appropriate manner that does not -hamper the display or enjoyment of the work. -Deployers of an AI system that generates or manipulates text which is published with the purpose of informing the public -on matters of public interest shall disclose that the text has been artificially generated or manipulated. This obligation shall -not apply where the use is authorised by law to detect, prevent, investigate or prosecute criminal offences or where the -AI-generated content has undergone a process of human review or editorial control and where a natural or legal person -holds editorial responsibility for the publication of the content. -EN OJ L, 12.7.2024 -82/144 ELI: http://data.europa.eu/eli/reg/2024/1689/oj -5. The information referred to in paragraphs 1 to 4 shall be provided to the natural persons concerned in a clear and -distinguishable manner at the latest at the time of the first interaction or exposure. The information shall conform to the -applicable accessibility requirements. -6. Paragraphs 1 to 4 shall not affect the requirements and obligations set out in Chapter III, and shall be without -prejudice to other transparency obligations laid down in Union or national law for deployers of AI systems. -7. The AI Office shall encourage and facilitate the drawing up of codes of practice at Union level to facilitate the effective -implementation of the obligations regarding the detection and labelling of artificially generated or manipulated content. -The Commission may adopt implementing acts to approve those codes of practice in accordance with the procedure laid -down in Article 56 (6). If it deems the code is not adequate, the Commission may adopt an implementing act specifying -common rules for the implementation of those obligations in accordance with the examination procedure laid down in -Article 98(2). diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index b942988..6739d1b 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -20,16 +20,6 @@ https://arxiv.org/pdf/2106.09685.pdf,4 OUR METHOD,Does LoRA work with any neural https://arxiv.org/pdf/2106.09685.pdf,LORA ABSTRACT,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,"In billions, how many trainable parameters does GPT-3 have?",175 https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,Does LoRA introduce additional inference latency compared to full fine-tuning?,NO -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Data and data governance,"what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Classification of general-purpose AI models as general-purpose AI models with systemic risk,"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25",C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Transparency obligations for providers and deployers of certain AI systems,"What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Reporting of serious incidents,How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days,B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Testing of high-risk AI systems in real world conditions outside AI regulatory sandboxes,"What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Penalties,"What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher",A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Codes of practice,By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025,A -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Compliant AI systems which present a risk,What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately,C -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,EU declaration of conformity,"How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years",B -https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689,Establishment and structure of the European Artificial Intelligence Board,"How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once",B https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,SECTION I. INTRODUCTION,"According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license",B https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000",A https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,SECTION I. INTRODUCTION,Does open access eliminate price barriers?,YES @@ -68,7 +58,7 @@ https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rule https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,How many victory points you get from each conquered island?,1 https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,CLEANUP PHASE,Is there a cleanup phase in the final round?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOCATION ABILITIES,How many victory points are granted by a built Field Location card that work as an upgrade?,1 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOCATION ABILITIES,Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOCATION ABILITIES,Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?,YES https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,RAID,Can you use the raid action without a Raze token?,NO https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,Can the game end in a tie?,YES https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,"If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie",A @@ -83,10 +73,10 @@ https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.",C https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.",A https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.",C -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.",A https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.",B https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.",C -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,23.1. What is Lazy Loading?,"What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,23.1. What is Lazy Loading?,"What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.",A https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,23.1. What is Lazy Loading?,"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?",NO https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Risk Perception,"which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.",B https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Risk Perception,"In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.",C @@ -98,3 +88,17 @@ https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024 https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,U.S. Regulation,"Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance",B https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Europe,"Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom",B https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Europe,"Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.",C +https://arxiv.org/pdf/2302.13971,"2.1 Pre-training Data","What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%",A +https://arxiv.org/pdf/2302.13971,"2.1 Pre-training Data","How many languages did the Wikipedia data cover?",20 +https://arxiv.org/pdf/2302.13971,"2.3 Optimizer","What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",A +https://arxiv.org/pdf/2302.13971,"2.3 Optimizer","What value was used for the weight decay?",0.1 +https://arxiv.org/pdf/2302.13971,"3 Main results","How many benchmarks were tested?",20 +https://arxiv.org/pdf/2302.13971,"3 Main results","Was the model compared against GPT-4?",NO +https://arxiv.org/pdf/2302.13971,"5 Bias, Toxicity and Misinformation","Can LLMs re-produce biases that exist in training data?",YES +https://arxiv.org/pdf/2302.13971,"5 Bias, Toxicity and Misinformation","Do authors consider the evaluations enough to fully comprehend the risks of the model?",NO +https://arxiv.org/pdf/2302.13971,"5.2 CrowS-Pairs","Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?",NO +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Limitations of generative AI and LLMs","Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization",C +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Limitations of generative AI and LLMs","Can LLMs be used as an alternative to visiting a doctor?",NO +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Procurement in an emerging market","Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?",C +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Accountability and responsibility","what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.",B +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Accountability and responsibility","What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.",B From 4f8f82a457f68c85282ae177927b78ece33ea59c Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 11:38:20 +0100 Subject: [PATCH 098/120] Update --- .../qwen_2_5_7B_find_retrieve_answer.ipynb | 1412 +++++------------ 1 file changed, 418 insertions(+), 994 deletions(-) diff --git a/benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb b/benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb index edeca37..416d0c5 100644 --- a/benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb +++ b/benchmark/qwen_2_5_7B_find_retrieve_answer.ipynb @@ -38,87 +38,30 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tk9uiWGn81j_", + "outputId": "b75d8f37-168a-4ca7-dd23-121051d6c0fb" + }, + "outputs": [], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2HoyF-xbHEgv", - "outputId": "c67c89e2-0140-432a-c741-a2546685176b" + "outputId": "a44a894c-1079-470f-d323-578d05238768" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-zpym8juf\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-zpym8juf\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 7b9c96cd5fc3cd34781aa26e2519a6f4731feedc\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (3.11.0)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev84+g7b9c96c) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev84+g7b9c96c) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev84+g7b9c96c) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev84+g7b9c96c) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.23.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev84+g7b9c96c) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (24.3.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.36.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev84+g7b9c96c) (1.17.0)\n" - ] - } - ], + "outputs": [], "source": [ "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" ] @@ -126,32 +69,23 @@ { "cell_type": "code", "execution_count": 2, + "metadata": { + "id": "lJs7zN4N8vhO" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "p_hsSGafHEgw", - "outputId": "b78ec720-0315-48fe-9d15-dc3400ca69d7" + "outputId": "03e8238b-d5f4-4ced-99e7-041a4263b92d" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-01-29 13:54:23-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 21734 (21K) [text/plain]\n", - "Saving to: ‘structured_qa.csv.3’\n", - "\n", - "\rstructured_qa.csv.3 0%[ ] 0 --.-KB/s \rstructured_qa.csv.3 100%[===================>] 21.22K --.-KB/s in 0.002s \n", - "\n", - "2025-01-29 13:54:24 (9.19 MB/s) - ‘structured_qa.csv.3’ saved [21734/21734]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" ] @@ -167,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "id": "5bLJE4U7HEgx" }, @@ -180,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "id": "y3yUsRDWHEgy" }, @@ -200,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "id": "n6d8F7cYHEgy" }, @@ -213,23 +147,19 @@ "\n", "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", "You are a rigorous assistant answering questions.\n", - "You only answer based on the current information available.\n", - "The current information available is:\n", + "You must only answer based on the current information available which is:\n", "\n", "```\n", "{CURRENT_INFO}\n", "```\n", "\n", - "The answer must be in one of the following formats:\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", "- YES/NO (for boolean questions)\n", - "Is the model an LLM?\n", - "YES\n", "- Number (for numeric questions)\n", - "How many layers does the model have?\n", - "12\n", "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "C\n", "\"\"\"\n", "\n", "\n", @@ -272,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "id": "9zx8nCaZHEgz" }, @@ -283,9 +213,27 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { - "id": "U4R84hHRHEgz" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156, + "referenced_widgets": [ + "79ed82485b234525976e17fc9ebe47de", + "c35c62230dfe4c32b2c45632e691cd46", + "cfeae74e2e3f44e9b2e2dbbb9c756bd8", + "082c72195b2b4a13b0ca7cecf368cebf", + "b09e7d3ff8cd498aa45faca3b390ac70", + "5d98e9f3ef544194b6a99d94c1b56cd2", + "83e32e91aff04efda8ca1efad07249cb", + "d320191f9d7146f8b8ccd10e7ad6dd6d", + "575b8c7136f846c9ab62e7a796350506", + "96da7d2d05834aa98fe41ad43e5c2c62", + "e2ae5284ec9349a4a920585c12419e33" + ] + }, + "id": "U4R84hHRHEgz", + "outputId": "3c083d53-79db-4f8c-ef1a-a12754cb227f" }, "outputs": [], "source": [ @@ -305,453 +253,15 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + "base_uri": "https://localhost:8080/" }, "id": "-qtPf9RmHEgz", - "outputId": "bc1ab1b7-e8d2-4fb0-981b-883e7eeaa794" + "outputId": "723ac0e9-23c4-470f-df81-1ec4065df532" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-29 13:54:26.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:26.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:26.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:26.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:26.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:27.691\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 988.13ms\n", - "\u001b[32m2025-01-29 13:54:27.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:27.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 540.84ms\n", - "\u001b[32m2025-01-29 13:54:28.263\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:28.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:28.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:28.870\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 572.93ms\n", - "\u001b[32m2025-01-29 13:54:28.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:28.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:29.418\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 521.13ms\n", - "\u001b[32m2025-01-29 13:54:29.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:29.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 653.13ms\n", - "\u001b[32m2025-01-29 13:54:30.095\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:30.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:30.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:30.948\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 829.04ms\n", - "\u001b[32m2025-01-29 13:54:30.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:30.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:31.576\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 615.85ms\n", - "\u001b[32m2025-01-29 13:54:31.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:31.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:32.208\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 613.00ms\n", - "\u001b[32m2025-01-29 13:54:32.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:32.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:32.858\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 626.48ms\n", - "\u001b[32m2025-01-29 13:54:32.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:32.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:33.431\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 554.99ms\n", - "\u001b[32m2025-01-29 13:54:33.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:33.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:33.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 1706.03762.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:33.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:33.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:34.056\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 568.57ms\n", - "\u001b[32m2025-01-29 13:54:34.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:34.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 689.37ms\n", - "\u001b[32m2025-01-29 13:54:34.770\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:34.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:34.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:35.397\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 613.50ms\n", - "\u001b[32m2025-01-29 13:54:35.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:35.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 582.09ms\n", - "\u001b[32m2025-01-29 13:54:35.992\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:35.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.49ms\n", - "\u001b[32m2025-01-29 13:54:36.611\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:36.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:36.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:37.234\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 610.46ms\n", - "\u001b[32m2025-01-29 13:54:37.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:37.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.15ms\n", - "\u001b[32m2025-01-29 13:54:37.831\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:37.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:37.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 609.22ms\n", - "\u001b[32m2025-01-29 13:54:38.452\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:38.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:38.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.94ms\n", - "\u001b[32m2025-01-29 13:54:39.100\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:39.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:39.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 661.72ms\n", - "\u001b[32m2025-01-29 13:54:39.770\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:39.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:39.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:40.339\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 558.40ms\n", - "\u001b[32m2025-01-29 13:54:40.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:40.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:40.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2106.09685.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:40.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:40.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:41.023\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.57ms\n", - "\u001b[32m2025-01-29 13:54:41.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:41.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:41.666\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 635.63ms\n", - "\u001b[32m2025-01-29 13:54:41.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:41.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.74ms\n", - "\u001b[32m2025-01-29 13:54:42.310\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:42.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:42.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:42.904\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 584.31ms\n", - "\u001b[32m2025-01-29 13:54:42.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:42.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2201.11903.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:42.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:42.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.44ms\n", - "\u001b[32m2025-01-29 13:54:43.588\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:43.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:43.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 481.89ms\n", - "\u001b[32m2025-01-29 13:54:44.083\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:44.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:44.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 660.08ms\n", - "\u001b[32m2025-01-29 13:54:44.754\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:44.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:44.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 557.92ms\n", - "\u001b[32m2025-01-29 13:54:45.329\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:45.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:45.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 636.75ms\n", - "\u001b[32m2025-01-29 13:54:45.976\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:45.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:45.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.68ms\n", - "\u001b[32m2025-01-29 13:54:46.600\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:46.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:46.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:47.296\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 686.39ms\n", - "\u001b[32m2025-01-29 13:54:47.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:47.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.93ms\n", - "\u001b[32m2025-01-29 13:54:47.915\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:47.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:47.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:47.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2210.05189.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:47.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:47.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 635.76ms\n", - "\u001b[32m2025-01-29 13:54:48.574\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:48.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:48.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 582.96ms\n", - "\u001b[32m2025-01-29 13:54:49.168\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:49.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:49.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:49.864\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 685.88ms\n", - "\u001b[32m2025-01-29 13:54:49.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:49.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 559.12ms\n", - "\u001b[32m2025-01-29 13:54:50.436\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:50.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:50.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.69ms\n", - "\u001b[32m2025-01-29 13:54:51.056\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:51.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:51.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.10ms\n", - "\u001b[32m2025-01-29 13:54:51.676\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:51.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:51.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:51.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:51.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:51.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:52.359\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.73ms\n", - "\u001b[32m2025-01-29 13:54:52.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:52.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:52.956\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:52.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:52.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 588.01ms\n", - "\u001b[32m2025-01-29 13:54:53.555\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:53.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 585.27ms\n", - "\u001b[32m2025-01-29 13:54:53.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:54.227\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.84ms\n", - "\u001b[32m2025-01-29 13:54:54.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:54.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.02ms\n", - "\u001b[32m2025-01-29 13:54:54.872\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:54.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:54.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 557.19ms\n", - "\u001b[32m2025-01-29 13:54:55.438\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:55.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:55.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:55.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.98ms\n", - "\u001b[32m2025-01-29 13:54:56.098\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:56.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:56.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 584.96ms\n", - "\u001b[32m2025-01-29 13:54:56.697\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:56.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:56.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.18ms\n", - "\u001b[32m2025-01-29 13:54:57.422\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:57.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:57.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.42ms\n", - "\u001b[32m2025-01-29 13:54:58.019\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:58.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:58.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:58.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile CUDA_C_Programming_Guide.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:58.028\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:58.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:58.759\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 715.90ms\n", - "\u001b[32m2025-01-29 13:54:58.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:58.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 736.45ms\n", - "\u001b[32m2025-01-29 13:54:59.509\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:59.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:54:59.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.31ms\n", - "\u001b[32m2025-01-29 13:55:00.210\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:00.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:00.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.52ms\n", - "\u001b[32m2025-01-29 13:55:00.815\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:00.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:00.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:01.414\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:01.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 585.33ms\n", - "\u001b[32m2025-01-29 13:55:01.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 583.50ms\n", - "\u001b[32m2025-01-29 13:55:02.012\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:02.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:02.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:02.690\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:02.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 660.22ms\n", - "\u001b[32m2025-01-29 13:55:02.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.62ms\n", - "\u001b[32m2025-01-29 13:55:03.415\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:03.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:03.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 710.13ms\n", - "\u001b[32m2025-01-29 13:55:04.140\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:04.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:04.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:04.819\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:04.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:04.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 673.64ms\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.60ms\n", - "\u001b[32m2025-01-29 13:55:05.500\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:05.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:05.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.66ms\n", - "\u001b[32m2025-01-29 13:55:06.206\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:06.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:06.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:06.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile ?uri=OJ:L_202401689.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:06.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:06.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:06.893\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 664.22ms\n", - "\u001b[32m2025-01-29 13:55:06.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:06.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 608.95ms\n", - "\u001b[32m2025-01-29 13:55:07.517\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:07.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:07.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.62ms\n", - "\u001b[32m2025-01-29 13:55:08.136\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:08.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:08.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 835.55ms\n", - "\u001b[32m2025-01-29 13:55:08.983\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:08.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:08.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:09.730\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 738.10ms\n", - "\u001b[32m2025-01-29 13:55:09.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:09.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:10.352\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 610.85ms\n", - "\u001b[32m2025-01-29 13:55:10.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:10.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 809.95ms\n", - "\u001b[32m2025-01-29 13:55:11.175\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:11.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:11.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.79ms\n", - "\u001b[32m2025-01-29 13:55:11.794\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:11.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:11.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:12.465\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:12.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.49ms\n", - "\u001b[32m2025-01-29 13:55:12.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 835.45ms\n", - "\u001b[32m2025-01-29 13:55:13.310\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:13.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:13.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:13.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 7DUME_EN01_Rules.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:13.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:13.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 658.36ms\n", - "\u001b[32m2025-01-29 13:55:13.993\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:13.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:13.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.31ms\n", - "\u001b[32m2025-01-29 13:55:14.714\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:14.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:14.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 633.73ms\n", - "\u001b[32m2025-01-29 13:55:15.360\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:15.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:15.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:16.033\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.55ms\n", - "\u001b[32m2025-01-29 13:55:16.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:16.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:16.680\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:16.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:16.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 643.44ms\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 661.16ms\n", - "\u001b[32m2025-01-29 13:55:17.353\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:17.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:17.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.50ms\n", - "\u001b[32m2025-01-29 13:55:18.050\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:18.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:18.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.33ms\n", - "\u001b[32m2025-01-29 13:55:18.721\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:18.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:18.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 609.49ms\n", - "\u001b[32m2025-01-29 13:55:19.345\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:19.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:19.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.21ms\n", - "\u001b[32m2025-01-29 13:55:20.068\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:20.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:20.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 684.96ms\n", - "\u001b[32m2025-01-29 13:55:20.764\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:20.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:20.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.86ms\n", - "\u001b[32m2025-01-29 13:55:21.409\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:21.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:21.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.19ms\n", - "\u001b[32m2025-01-29 13:55:22.081\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:22.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:22.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.59ms\n", - "\u001b[32m2025-01-29 13:55:22.701\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:22.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:22.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.04ms\n", - "\u001b[32m2025-01-29 13:55:23.422\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:23.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:23.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:23.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile is_eotn_rulebook.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 709.39ms\n", - "\u001b[32m2025-01-29 13:55:24.159\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:24.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:24.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 614.42ms\n", - "\u001b[32m2025-01-29 13:55:24.783\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:24.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:24.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 659.58ms\n", - "\u001b[32m2025-01-29 13:55:25.454\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:25.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:25.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 860.42ms\n", - "\u001b[32m2025-01-29 13:55:26.327\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:26.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:26.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.94ms\n", - "\u001b[32m2025-01-29 13:55:27.024\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:27.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:27.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:27.696\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:27.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:27.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 669.67ms\n", - "\u001b[32m2025-01-29 13:55:28.420\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:28.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:28.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 721.91ms\n", - "\u001b[32m2025-01-29 13:55:29.116\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:29.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:29.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 687.25ms\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 683.55ms\n", - "\u001b[32m2025-01-29 13:55:29.813\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:29.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:29.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 607.97ms\n", - "\u001b[32m2025-01-29 13:55:30.434\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:30.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:30.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 632.87ms\n", - "\u001b[32m2025-01-29 13:55:31.077\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:31.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:31.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:31.734\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 644.82ms\n", - "\u001b[32m2025-01-29 13:55:31.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:31.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-29 13:55:32.540\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mstructured_qa.workflow\u001b[0m:\u001b[36mfind_retrieve_answer\u001b[0m:\u001b[36m83\u001b[0m - \u001b[31m\u001b[1mFailed to generate completion: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).\u001b[0m\n", - "WARNING:tornado.access:429 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 763.87ms\n", - "\u001b[32m2025-01-29 13:55:32.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1mAnswer: Generation Error\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "from urllib.request import urlretrieve\n", @@ -783,438 +293,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 424 + "height": 1000 }, "id": "3eW9TIKjHEgz", - "outputId": "186ae159-38de-4104-ca6d-c29d1172503f" + "outputId": "b82bcada-3c21-4d13-ee04-8b57eeb83c4c" }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 99,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 0,\n \"max\": 98,\n \"num_unique_values\": 99,\n \"samples\": [\n 62,\n 40,\n 95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 59,\n \"samples\": [\n \"3 Model Architecture\",\n \"5.2 Hardware and Schedule\",\n \"CARD AND TILE EFFECTS\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 98,\n \"samples\": [\n \"Can you raid the locations of a player that has passed during the action phase?\",\n \"Is symbolic reasoning usually simple for humans but challenging for language models?\",\n \"How many AI-related regulations were enacted in the United States in 2023?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 21,\n \"samples\": [\n \"C\",\n \"4\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"GENERATION ERROR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
00https://arxiv.org/pdf/1706.037623 Model ArchitectureWhat type of architecture does the model use? ...CGENERATION ERRORNaN
11https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the encoder?6GENERATION ERRORNaN
22https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the decoder?6GENERATION ERRORNaN
33https://arxiv.org/pdf/1706.037623.2.2 Multi-Head AttentionHow many parallel attention heads are used?8GENERATION ERRORNaN
44https://arxiv.org/pdf/1706.037623.4 Embeddings and SoftmaxDoes the final model use learned embeddings fo...YESGENERATION ERRORNaN
........................
9494https://aiindex.stanford.edu/wp-content/upload...LLM Tokenization Introduces UnfairnessWhat are the three major inequalities resultin...BGENERATION ERRORNaN
9595https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationHow many AI-related regulations were enacted i...25GENERATION ERRORNaN
9696https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationWhich of the following was identified as a hig...BGENERATION ERRORNaN
9797https://aiindex.stanford.edu/wp-content/upload...EuropeWhich country had the highest proportion of fe...BGENERATION ERRORNaN
9898https://aiindex.stanford.edu/wp-content/upload...EuropeWhich countries reported the smallest proporti...CGENERATION ERRORNaN
\n", - "

99 rows × 7 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "0 0 https://arxiv.org/pdf/1706.03762 \n", - "1 1 https://arxiv.org/pdf/1706.03762 \n", - "2 2 https://arxiv.org/pdf/1706.03762 \n", - "3 3 https://arxiv.org/pdf/1706.03762 \n", - "4 4 https://arxiv.org/pdf/1706.03762 \n", - ".. ... ... \n", - "94 94 https://aiindex.stanford.edu/wp-content/upload... \n", - "95 95 https://aiindex.stanford.edu/wp-content/upload... \n", - "96 96 https://aiindex.stanford.edu/wp-content/upload... \n", - "97 97 https://aiindex.stanford.edu/wp-content/upload... \n", - "98 98 https://aiindex.stanford.edu/wp-content/upload... \n", - "\n", - " section \\\n", - "0 3 Model Architecture \n", - "1 3.1 Encoder and Decoder Stacks \n", - "2 3.1 Encoder and Decoder Stacks \n", - "3 3.2.2 Multi-Head Attention \n", - "4 3.4 Embeddings and Softmax \n", - ".. ... \n", - "94 LLM Tokenization Introduces Unfairness \n", - "95 U.S. Regulation \n", - "96 U.S. Regulation \n", - "97 Europe \n", - "98 Europe \n", - "\n", - " question answer \\\n", - "0 What type of architecture does the model use? ... C \n", - "1 How many layers compose the encoder? 6 \n", - "2 How many layers compose the decoder? 6 \n", - "3 How many parallel attention heads are used? 8 \n", - "4 Does the final model use learned embeddings fo... YES \n", - ".. ... ... \n", - "94 What are the three major inequalities resultin... B \n", - "95 How many AI-related regulations were enacted i... 25 \n", - "96 Which of the following was identified as a hig... B \n", - "97 Which country had the highest proportion of fe... B \n", - "98 Which countries reported the smallest proporti... C \n", - "\n", - " pred_answer pred_section \n", - "0 GENERATION ERROR NaN \n", - "1 GENERATION ERROR NaN \n", - "2 GENERATION ERROR NaN \n", - "3 GENERATION ERROR NaN \n", - "4 GENERATION ERROR NaN \n", - ".. ... ... \n", - "94 GENERATION ERROR NaN \n", - "95 GENERATION ERROR NaN \n", - "96 GENERATION ERROR NaN \n", - "97 GENERATION ERROR NaN \n", - "98 GENERATION ERROR NaN \n", - "\n", - "[99 rows x 7 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "results = pd.read_csv(\"results.csv\")\n", "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" @@ -1222,26 +310,15 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AhenESELHEgz", - "outputId": "4b5d7785-4d17-4c78-b0f8-d69fa50bad15" + "outputId": "7e08929e-1342-4705-b1b4-e8cac81c35a6" }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.0" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", "accuracy" @@ -1249,17 +326,364 @@ } ], "metadata": { + "accelerator": "GPU", "colab": { + "gpuType": "T4", "provenance": [] }, "kernelspec": { - "display_name": ".venv", - "language": "python", + "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "082c72195b2b4a13b0ca7cecf368cebf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_96da7d2d05834aa98fe41ad43e5c2c62", + "placeholder": "​", + "style": "IPY_MODEL_e2ae5284ec9349a4a920585c12419e33", + "value": " 8.10G/8.10G [05:55<00:00, 22.8MB/s]" + } + }, + "575b8c7136f846c9ab62e7a796350506": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5d98e9f3ef544194b6a99d94c1b56cd2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "79ed82485b234525976e17fc9ebe47de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c35c62230dfe4c32b2c45632e691cd46", + "IPY_MODEL_cfeae74e2e3f44e9b2e2dbbb9c756bd8", + "IPY_MODEL_082c72195b2b4a13b0ca7cecf368cebf" + ], + "layout": "IPY_MODEL_b09e7d3ff8cd498aa45faca3b390ac70" + } + }, + "83e32e91aff04efda8ca1efad07249cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "96da7d2d05834aa98fe41ad43e5c2c62": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b09e7d3ff8cd498aa45faca3b390ac70": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c35c62230dfe4c32b2c45632e691cd46": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5d98e9f3ef544194b6a99d94c1b56cd2", + "placeholder": "​", + "style": "IPY_MODEL_83e32e91aff04efda8ca1efad07249cb", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "cfeae74e2e3f44e9b2e2dbbb9c756bd8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d320191f9d7146f8b8ccd10e7ad6dd6d", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_575b8c7136f846c9ab62e7a796350506", + "value": 8098525888 + } + }, + "d320191f9d7146f8b8ccd10e7ad6dd6d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e2ae5284ec9349a4a920585c12419e33": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } }, "nbformat": 4, From 2de0bfb3a93d71bfc5b5ae996aa04338c6d9308c Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 12:55:07 +0100 Subject: [PATCH 099/120] Add max_sections_to_check --- src/structured_qa/workflow.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index a51c225..0b2327c 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -17,6 +17,7 @@ def find_retrieve_answer( sections_dir: str, find_prompt: str, answer_prompt: str, + max_sections_to_check: int = 10, ) -> tuple[str, list[str]] | tuple[None, list[str]]: """ Workflow to find the relevant section, retrieve the information, and answer the question. @@ -40,7 +41,7 @@ def find_retrieve_answer( answer_prompt (str): The prompt for answering the question. See [`ANSWER_PROMPT`][structured_qa.config.ANSWER_PROMPT]. - + max_sections_to_check (int, optional): The maximum number of sections to check before giving up. Returns: tuple[str, list[str]] | tuple[None, list[str]]: @@ -53,7 +54,7 @@ def find_retrieve_answer( current_section = None sections_checked = [] - while sections_names: + while len(sections_checked) < max_sections_to_check: logger.debug(f"Current information available: {current_info}") if not current_info: logger.debug("Finding section") From 8f7d173a05bbde6d69161ea2da9dcdb32a53d3bb Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 14:44:44 +0100 Subject: [PATCH 100/120] Default to None --- src/structured_qa/workflow.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 0b2327c..35928c3 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -17,7 +17,7 @@ def find_retrieve_answer( sections_dir: str, find_prompt: str, answer_prompt: str, - max_sections_to_check: int = 10, + max_sections_to_check: int | None = None, ) -> tuple[str, list[str]] | tuple[None, list[str]]: """ Workflow to find the relevant section, retrieve the information, and answer the question. @@ -42,6 +42,9 @@ def find_retrieve_answer( See [`ANSWER_PROMPT`][structured_qa.config.ANSWER_PROMPT]. max_sections_to_check (int, optional): The maximum number of sections to check before giving up. + Defaults to None. + If None, it will check all sections until it finds the answer. + Returns: tuple[str, list[str]] | tuple[None, list[str]]: @@ -53,6 +56,9 @@ def find_retrieve_answer( current_info = None current_section = None + if max_sections_to_check is None: + max_sections_to_check = len(sections_names) + sections_checked = [] while len(sections_checked) < max_sections_to_check: logger.debug(f"Current information available: {current_info}") From 7ff95ff7b583085170cd96432e03a953427ef666 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 14:47:09 +0100 Subject: [PATCH 101/120] Default to half of sections --- src/structured_qa/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 35928c3..8806143 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -43,7 +43,7 @@ def find_retrieve_answer( See [`ANSWER_PROMPT`][structured_qa.config.ANSWER_PROMPT]. max_sections_to_check (int, optional): The maximum number of sections to check before giving up. Defaults to None. - If None, it will check all sections until it finds the answer. + If None, it will check half of the sections until it finds the answer. Returns: tuple[str, list[str]] | tuple[None, list[str]]: @@ -57,7 +57,7 @@ def find_retrieve_answer( current_section = None if max_sections_to_check is None: - max_sections_to_check = len(sections_names) + max_sections_to_check = len(sections_names) // 2 sections_checked = [] while len(sections_checked) < max_sections_to_check: From d05d9921a2436bd104eee53aa9be1079f32c7691 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 14:48:12 +0100 Subject: [PATCH 102/120] Update --- src/structured_qa/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 8806143..6a768b4 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -43,7 +43,7 @@ def find_retrieve_answer( See [`ANSWER_PROMPT`][structured_qa.config.ANSWER_PROMPT]. max_sections_to_check (int, optional): The maximum number of sections to check before giving up. Defaults to None. - If None, it will check half of the sections until it finds the answer. + If None, it will check up to a maximum of 20 sections until it finds the answer. Returns: tuple[str, list[str]] | tuple[None, list[str]]: @@ -57,7 +57,7 @@ def find_retrieve_answer( current_section = None if max_sections_to_check is None: - max_sections_to_check = len(sections_names) // 2 + max_sections_to_check = min(20, len(sections_names)) sections_checked = [] while len(sections_checked) < max_sections_to_check: From db63dc94e8a8da0b7cbabe736620145c41a9b1ae Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 14:52:37 +0100 Subject: [PATCH 103/120] fix --- .../{2.1 Pre-training Data => 2.1 Pre-training Data.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename benchmark/perfect_context/{2.1 Pre-training Data => 2.1 Pre-training Data.txt} (100%) diff --git a/benchmark/perfect_context/2.1 Pre-training Data b/benchmark/perfect_context/2.1 Pre-training Data.txt similarity index 100% rename from benchmark/perfect_context/2.1 Pre-training Data rename to benchmark/perfect_context/2.1 Pre-training Data.txt From 20f9e3f0fac71be9b7a25e069cb534b602c7e924 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 14:57:30 +0100 Subject: [PATCH 104/120] Fix --- benchmark/perfect_context/{2.3 Optimizer => 2.3 Optimizer.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename benchmark/perfect_context/{2.3 Optimizer => 2.3 Optimizer.txt} (100%) diff --git a/benchmark/perfect_context/2.3 Optimizer b/benchmark/perfect_context/2.3 Optimizer.txt similarity index 100% rename from benchmark/perfect_context/2.3 Optimizer rename to benchmark/perfect_context/2.3 Optimizer.txt From c5ee8e63ab951b740147be2d69c2f00549043734 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 15:15:35 +0100 Subject: [PATCH 105/120] Add qwen full context --- benchmark/gemini_perfect_context.ipynb | 2403 +++++++++++----------- benchmark/qwen_2_5_7B_full_context.ipynb | 692 +++++++ 2 files changed, 1913 insertions(+), 1182 deletions(-) create mode 100644 benchmark/qwen_2_5_7B_full_context.ipynb diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index dbf1f67..c11484e 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -1,1199 +1,1238 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oD2lVadPlyhR" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JZeb4ABvlyhS" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aI1i7vlyhS" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "atlPXFshlyhS" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] }, - "id": "QrgOGtuGlyhT", - "outputId": "e5fcc547-6580-4068-d844-28afc1c28d9c" - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cloning into 'structured-qa'...\n", - "remote: Enumerating objects: 724, done.\u001b[K\n", - "remote: Counting objects: 100% (162/162), done.\u001b[K\n", - "remote: Compressing objects: 100% (101/101), done.\u001b[K\n", - "remote: Total 724 (delta 100), reused 74 (delta 61), pack-reused 562 (from 1)\u001b[K\n", - "Receiving objects: 100% (724/724), 2.23 MiB | 6.64 MiB/s, done.\n", - "Resolving deltas: 100% (382/382), done.\n" - ] - } - ], - "source": [ - "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] }, - "id": "S22kTrfPlyhU", - "outputId": "40005814-676a-4abd-8d00-3b86d1e3a3f0" - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing ./structured-qa\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", - "Building wheels for collected packages: structured-qa\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=2defc7ec99afa5814e6713aee2aca5a6364f8b2a2b97e206aa16ccabf91113eb\n", - " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - "Successfully built structured-qa\n", - "Installing collected packages: structured-qa\n", - " Attempting uninstall: structured-qa\n", - " Found existing installation: structured-qa 0.3.3.dev71+gae325d3\n", - " Uninstalling structured-qa-0.3.3.dev71+gae325d3:\n", - " Successfully uninstalled structured-qa-0.3.3.dev71+gae325d3\n", - "Successfully installed structured-qa-0.3.3.dev71+gae325d3\n" - ] - } - ], - "source": [ - "%pip install ./structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qwHWJEsulyhV" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "iJ812u2llyhV" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "jWlaKC5qXZrh" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MKijHC_ClyhX" - }, - "source": [ - "## Function to Process all questions for a single Section" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "oFU-eYMVlyhX" - }, - "outputs": [], - "source": [ - "import json\n", - "import time\n", - "\n", - "\n", - "def process_section_questions(\n", - " section_file,\n", - " section_data,\n", - " model,\n", - "):\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in section_data.iterrows():\n", - " if model.n > 0 and model.n % 10 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " response = model.model.generate_content([section_file.read_text(), question])\n", - " logger.info(response.text)\n", - " response_json = json.loads(response.text)\n", - " answers[index] = response_json[\"answer\"]\n", - " sections[index] = None\n", - " model.n += 1\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "6RoEbYj3XZri" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "whtSJwdrlyhZ" - }, - "outputs": [], - "source": [ - "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with one key: \"answer\".\n", - "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"answer\": 12\n", - "}\n", - "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "{\n", - " \"answer\": \"C\"\n", - "}\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "ObsvwlNslyhZ" - }, - "outputs": [], - "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")\n", - "model.n = 0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W97jWzzOlyhZ" - }, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] }, - "id": "AZBwRnfjlyhZ", - "outputId": "e8e05071-0cad-41b5-c3f5-a0437357270d" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-01-28 14:06:32.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:32.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:32.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:34.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 175\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:34.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:36.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:38.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:40.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:40.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:41.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:41.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:43.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:43.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:44.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:46.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:48.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:50.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:50.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:50.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:52.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:53.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:55.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:55.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:56.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:56.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:58.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 14\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:58.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:00.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:00.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:01.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:03.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:04.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:04.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:06.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:06.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:08.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 100\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:10.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:15.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:17.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:20.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:21.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:23.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:23.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:23.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:24.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:24.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:26.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:30.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:31.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1024\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:31.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:33.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:34.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:34.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4000\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:36.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 0.1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:38.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:40.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:40.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:40.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:42.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:42.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:44.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:45.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:49.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:51.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:52.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:52.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:55.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:56.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:56.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:58.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:58.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:12:58.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:02.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:02.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:03.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:05.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:07.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:07.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:10.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:12.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:14.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:16.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:13:17.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:17.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:19.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:20.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:22.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:24.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:24.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 7\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:25.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:27.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:32.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:32.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:34.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:36.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:38.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:38.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:40.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:40.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:41.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:43.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:43.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:46.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:47.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:50.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:50.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:54.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:56.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:56.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:01.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:02.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:02.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:02.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:04.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:06.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:11.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:14.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:14.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:16.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:18.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:34.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:34.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:36.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:36.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:39.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:40.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:42.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:42.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:45.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:47.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:49.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:50.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:52.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"2015\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "\n", - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for section_name, section_data in data.groupby(\"section\"):\n", - " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", - "\n", - " answers, sections = process_section_questions(section_file, section_data, model)\n", - "\n", - " for index in section_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a9eMkW1-lyha" - }, - "source": [ - "# Results" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 112 + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] }, - "id": "EYYJgWf6lyha", - "outputId": "720ca56f-eb28-4770-9ec6-8959e77d27d9" - }, - "outputs": [ { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 43,\n \"max\": 52,\n \"num_unique_values\": 2,\n \"samples\": [\n 52,\n 43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QrgOGtuGlyhT", + "outputId": "fbafc71f-3eca-4148-d567-308268381c6f" }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'structured-qa'...\n", + "remote: Enumerating objects: 893, done.\u001b[K\n", + "remote: Counting objects: 100% (331/331), done.\u001b[K\n", + "remote: Compressing objects: 100% (192/192), done.\u001b[K\n", + "remote: Total 893 (delta 211), reused 195 (delta 126), pack-reused 562 (from 1)\u001b[K\n", + "Receiving objects: 100% (893/893), 2.42 MiB | 12.01 MiB/s, done.\n", + "Resolving deltas: 100% (493/493), done.\n" + ] + } ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "43 43 https://arxiv.org/pdf/2201.11903 \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "\n", - " section \\\n", - "43 3.4 Robustness of Chain of Thought \n", - "52 CARD AND TILE COSTS \n", - "\n", - " question answer pred_answer \\\n", - "43 How many annotators provided independent chain... 3 2 \n", - "52 Can a player pay coins to compensate for missi... YES NO \n", - "\n", - " pred_section \n", - "43 NaN \n", - "52 NaN " + "source": [ + "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "wfz1XQDLlyha", - "outputId": "6417fafc-ffd5-46eb-ab92-8a2a905c4f71" - }, - "outputs": [ { - "data": { - "text/plain": [ - "0.9797979797979798" + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S22kTrfPlyhU", + "outputId": "22d372c0-9182-47af-a1cb-11da751f86b5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Processing ./structured-qa\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev104+g20f9e3f) (0.27.1)\n", + "Collecting loguru (from structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev104+g20f9e3f) (2.10.6)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev104+g20f9e3f) (6.0.2)\n", + "Collecting rapidfuzz (from structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev104+g20f9e3f) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev104+g20f9e3f) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev104+g20f9e3f) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m55.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m53.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev104+g20f9e3f-py3-none-any.whl size=13245 sha256=852eab90b4d55da99708db4c4a22243dfaa2d126838195140457d8926c7aeb9e\n", + " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ab54f7d454f84637ecb160a1e724611ea9da5404833ae692888ff83db75821c3\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.12.1 streamlit-1.41.1 structured-qa-0.3.3.dev104+g20f9e3f watchdog-6.0.0\n" + ] + } + ], + "source": [ + "%pip install ./structured-qa" ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", + "\n", + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " if model.n > 0 and model.n % 10 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " response = model.model.generate_content([section_file.read_text(), question])\n", + " logger.info(response.text)\n", + " response_json = json.loads(response.text)\n", + " answers[index] = response_json[\"answer\"]\n", + " sections[index] = None\n", + " model.n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "whtSJwdrlyhZ" + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are given an input document and a question.\n", + "You can only answer the question based on the information in the document.\n", + "You will return a JSON name with one key: \"answer\".\n", + "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", + "- Yes/No (for boolean questions)\n", + "Is the model an LLM?\n", + "{\n", + " \"answer\": \"No\"\n", + "}\n", + "- Single number (for numeric questions)\n", + "How many layers does the model have?\n", + "{\n", + " \"answer\": 12\n", + "}\n", + "- Single letter (for multiple-choice questions)\n", + "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", + "{\n", + " \"answer\": \"C\"\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\n", + " \"gemini-2.0-flash-exp\",\n", + " system_prompt=SYSTEM_PROMPT,\n", + " generation_config={\n", + " \"response_mime_type\": \"application/json\",\n", + " },\n", + ")\n", + "model.n = 0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "AZBwRnfjlyhZ", + "outputId": "6e4083db-4a64-49be-986e-d39ff4963e2b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[32m2025-02-03 13:58:31.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:31.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:31.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:33.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 175\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:33.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:34.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:34.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:34.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:36.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:36.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:36.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:37.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:37.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:38.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:38.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:40.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:40.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:41.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:41.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:41.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:43.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:43.022\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:44.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 20\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:44.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:44.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:45.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:45.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:58:45.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:45.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:47.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:47.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:49.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"0.1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:49.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:49.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:50.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:50.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:50.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:52.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:52.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:53.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:53.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:53.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:55.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:55.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:55.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:56.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:56.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:58.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:58.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:59.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 14\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 13:59:59.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-03 14:00:00.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:00:00.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:00.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:03.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:03.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:03.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:04.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 20\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:04.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:06.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:06.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:08.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:08.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:08.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:09.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:09.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:10.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:10.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:10.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:13.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 5\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:13.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:13.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:15.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": 100\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:15.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:15.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:16.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:16.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:01:16.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:16.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:17.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:17.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:17.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:19.051\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:19.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:19.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:20.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:20.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:20.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:21.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:21.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:21.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:22.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:22.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:22.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:24.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:24.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:26.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:26.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:26.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:27.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:27.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:30.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:30.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:31.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:31.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:02:31.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:31.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:33.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:33.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:33.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:34.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 8\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:34.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:35.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:35.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:35.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:37.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1024\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:37.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:38.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:38.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:38.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:39.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:39.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:40.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4000\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:40.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:40.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:42.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 0.1\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:42.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:42.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:43.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:43.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:44.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:03:44.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:44.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:45.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:45.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:47.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:47.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:47.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:48.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:48.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:50.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:50.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:50.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:51.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:51.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:53.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:53.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:53.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:55.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:55.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:55.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:56.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:56.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:57.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 2\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:57.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:57.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:59.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 6\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:04:59.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:05:59.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:00.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:00.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:03.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:03.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:04.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:04.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:04.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:06.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:06.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:06.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:08.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:08.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:09.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:10.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:12.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:12.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:14.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:14.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:14.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:17.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:17.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:17.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:19.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:19.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:06:19.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:19.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:20.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:20.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:21.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 7\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:21.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:21.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:23.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:23.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:24.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:24.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:26.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 1\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:26.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:26.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:27.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:27.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:28.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + "\"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:28.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:28.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:29.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:29.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:30.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:30.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:30.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:32.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:32.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:07:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:32.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:33.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:33.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:35.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:35.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:35.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:40.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"1\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:40.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:41.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:41.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:41.861\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:43.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 4\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:43.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:44.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:44.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:44.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:46.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:46.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:46.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:47.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:47.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:49.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:49.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:49.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:50.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:50.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:08:50.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:50.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:52.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:52.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:55.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 3\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:55.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:55.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:56.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:56.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:56.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:57.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:57.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:57.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:59.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:09:59.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:00.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:00.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:00.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:02.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:02.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:04.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:04.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:06.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:06.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:07.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:07.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:10:07.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:11:07.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-02-03 14:11:09.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": 25\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:11:09.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-02-03 14:11:11.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:11:11.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:11:11.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-02-03 14:11:14.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", + " \"answer\": \"2015\"\n", + "}\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "id": "EYYJgWf6lyha", + "outputId": "99160292-3796-4226-94b2-56fae4d048e6" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "33 33 https://arxiv.org/pdf/2201.11903 \n", + "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "33 3.4 Robustness of Chain of Thought \n", + "42 CARD AND TILE COSTS \n", + "\n", + " question answer pred_answer \\\n", + "33 How many annotators provided independent chain... 3 2 \n", + "42 Can a player pay coins to compensate for missi... YES NO \n", + "\n", + " pred_section \n", + "33 NaN \n", + "42 NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
3333https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
4242https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 33,\n \"max\": 42,\n \"num_unique_values\": 2,\n \"samples\": [\n 42,\n 33\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "370ef8ef-08f5-45c0-f671-5740f4975284" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9805825242718447" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/benchmark/qwen_2_5_7B_full_context.ipynb b/benchmark/qwen_2_5_7B_full_context.ipynb new file mode 100644 index 0000000..901817b --- /dev/null +++ b/benchmark/qwen_2_5_7B_full_context.ipynb @@ -0,0 +1,692 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9RKWbX7BHEgr" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PYuloevCHEgu" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgYAsUQWHEgv" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EbFAX4heHEgv" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tk9uiWGn81j_", + "outputId": "b75d8f37-168a-4ca7-dd23-121051d6c0fb" + }, + "outputs": [], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2HoyF-xbHEgv", + "outputId": "a44a894c-1079-470f-d323-578d05238768" + }, + "outputs": [], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "lJs7zN4N8vhO" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p_hsSGafHEgw", + "outputId": "03e8238b-d5f4-4ced-99e7-041a4263b92d" + }, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0MDfM6cyHEgx" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "5bLJE4U7HEgx" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "y3yUsRDWHEgy" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AgpODLeJHEgy" + }, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "n6d8F7cYHEgy" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", + "- Single letter (for multiple-choice questions)\n", + "\"\"\"\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", + " CURRENT_INFO=Path(document_file).read_text()\n", + " ),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " try:\n", + " answer = model.get_response(messages)\n", + " except Exception as e:\n", + " answer = \"Out of context\"\n", + " logger.info(f\"Answer: {answer}\")\n", + " answers[index] = answer\n", + " sections[index] = None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GdlWjANdHEgz" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "9zx8nCaZHEgz" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156, + "referenced_widgets": [ + "79ed82485b234525976e17fc9ebe47de", + "c35c62230dfe4c32b2c45632e691cd46", + "cfeae74e2e3f44e9b2e2dbbb9c756bd8", + "082c72195b2b4a13b0ca7cecf368cebf", + "b09e7d3ff8cd498aa45faca3b390ac70", + "5d98e9f3ef544194b6a99d94c1b56cd2", + "83e32e91aff04efda8ca1efad07249cb", + "d320191f9d7146f8b8ccd10e7ad6dd6d", + "575b8c7136f846c9ab62e7a796350506", + "96da7d2d05834aa98fe41ad43e5c2c62", + "e2ae5284ec9349a4a920585c12419e33" + ] + }, + "id": "U4R84hHRHEgz", + "outputId": "3c083d53-79db-4f8c-ef1a-a12754cb227f" + }, + "outputs": [], + "source": [ + "model = load_llama_cpp_model(\n", + " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BEzqJJ1yHEgz" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-qtPf9RmHEgz", + "outputId": "723ac0e9-23c4-470f-df81-1ec4065df532" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "3eW9TIKjHEgz", + "outputId": "b82bcada-3c21-4d13-ee04-8b57eeb83c4c" + }, + "outputs": [], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AhenESELHEgz", + "outputId": "7e08929e-1342-4705-b1b4-e8cac81c35a6" + }, + "outputs": [], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "082c72195b2b4a13b0ca7cecf368cebf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_96da7d2d05834aa98fe41ad43e5c2c62", + "placeholder": "​", + "style": "IPY_MODEL_e2ae5284ec9349a4a920585c12419e33", + "value": " 8.10G/8.10G [05:55<00:00, 22.8MB/s]" + } + }, + "575b8c7136f846c9ab62e7a796350506": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5d98e9f3ef544194b6a99d94c1b56cd2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "79ed82485b234525976e17fc9ebe47de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c35c62230dfe4c32b2c45632e691cd46", + "IPY_MODEL_cfeae74e2e3f44e9b2e2dbbb9c756bd8", + "IPY_MODEL_082c72195b2b4a13b0ca7cecf368cebf" + ], + "layout": "IPY_MODEL_b09e7d3ff8cd498aa45faca3b390ac70" + } + }, + "83e32e91aff04efda8ca1efad07249cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "96da7d2d05834aa98fe41ad43e5c2c62": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b09e7d3ff8cd498aa45faca3b390ac70": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c35c62230dfe4c32b2c45632e691cd46": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5d98e9f3ef544194b6a99d94c1b56cd2", + "placeholder": "​", + "style": "IPY_MODEL_83e32e91aff04efda8ca1efad07249cb", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "cfeae74e2e3f44e9b2e2dbbb9c756bd8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d320191f9d7146f8b8ccd10e7ad6dd6d", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_575b8c7136f846c9ab62e7a796350506", + "value": 8098525888 + } + }, + "d320191f9d7146f8b8ccd10e7ad6dd6d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e2ae5284ec9349a4a920585c12419e33": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From a4da6493e8f946d10f232d0bd73ba0c1c2f44e08 Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 15:49:20 +0100 Subject: [PATCH 106/120] Update qwen_full_context --- benchmark/qwen_2_5_7B_full_context.ipynb | 1778 +++++++++++++++++----- 1 file changed, 1386 insertions(+), 392 deletions(-) diff --git a/benchmark/qwen_2_5_7B_full_context.ipynb b/benchmark/qwen_2_5_7B_full_context.ipynb index 901817b..3c4beea 100644 --- a/benchmark/qwen_2_5_7B_full_context.ipynb +++ b/benchmark/qwen_2_5_7B_full_context.ipynb @@ -38,13 +38,9 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "tk9uiWGn81j_", - "outputId": "b75d8f37-168a-4ca7-dd23-121051d6c0fb" + "id": "tk9uiWGn81j_" }, "outputs": [], "source": [ @@ -53,22 +49,117 @@ }, { "cell_type": "code", - "execution_count": null, + "source": [ + "%pip install PyPDF2" + ], + "metadata": { + "id": "uBJnKqs_MqBV", + "outputId": "b2de9597-0143-4547-be78-fa56999806f3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: PyPDF2 in /usr/local/lib/python3.11/dist-packages (3.0.1)\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2HoyF-xbHEgv", - "outputId": "a44a894c-1079-470f-d323-578d05238768" + "outputId": "a509f236-08be-4371-d40d-aed1f49beed3" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-g4ugf7tj\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-g4ugf7tj\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit c5ee8e63ab951b740147be2d69c2f00549043734\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (3.12.1)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev105+gc5ee8e6) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev105+gc5ee8e6) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.17.0)\n" + ] + } + ], "source": [ "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "id": "lJs7zN4N8vhO" }, @@ -77,15 +168,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "p_hsSGafHEgw", - "outputId": "03e8238b-d5f4-4ced-99e7-041a4263b92d" + "outputId": "7220b133-024e-480c-aed7-0879efab0317" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2025-02-03 14:27:25-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21441 (21K) [text/plain]\n", + "Saving to: ‘structured_qa.csv.2’\n", + "\n", + "structured_qa.csv.2 100%[===================>] 20.94K --.-KB/s in 0.001s \n", + "\n", + "2025-02-03 14:27:25 (14.8 MB/s) - ‘structured_qa.csv.2’ saved [21441/21441]\n", + "\n" + ] + } + ], "source": [ "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" ] @@ -101,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "id": "5bLJE4U7HEgx" }, @@ -114,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "id": "y3yUsRDWHEgy" }, @@ -123,6 +232,26 @@ "from loguru import logger" ] }, + { + "cell_type": "code", + "source": [ + "import PyPDF2\n", + "\n", + "\n", + "def load_pdf(pdf_file: str) -> str | None:\n", + " try:\n", + " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", + " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", + " except Exception as e:\n", + " logger.exception(e)\n", + " return None" + ], + "metadata": { + "id": "d9HBkl8rM5ED" + }, + "execution_count": 7, + "outputs": [] + }, { "cell_type": "markdown", "metadata": { @@ -134,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": { "id": "n6d8F7cYHEgy" }, @@ -176,7 +305,7 @@ " {\n", " \"role\": \"system\",\n", " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", - " CURRENT_INFO=Path(document_file).read_text()\n", + " CURRENT_INFO=load_pdf(document_file)\n", " ),\n", " },\n", " {\"role\": \"user\", \"content\": question},\n", @@ -203,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": { "id": "9zx8nCaZHEgz" }, @@ -214,29 +343,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "colab": { - "base_uri": "https://localhost:8080/", - "height": 156, - "referenced_widgets": [ - "79ed82485b234525976e17fc9ebe47de", - "c35c62230dfe4c32b2c45632e691cd46", - "cfeae74e2e3f44e9b2e2dbbb9c756bd8", - "082c72195b2b4a13b0ca7cecf368cebf", - "b09e7d3ff8cd498aa45faca3b390ac70", - "5d98e9f3ef544194b6a99d94c1b56cd2", - "83e32e91aff04efda8ca1efad07249cb", - "d320191f9d7146f8b8ccd10e7ad6dd6d", - "575b8c7136f846c9ab62e7a796350506", - "96da7d2d05834aa98fe41ad43e5c2c62", - "e2ae5284ec9349a4a920585c12419e33" - ] + "base_uri": "https://localhost:8080/" }, "id": "U4R84hHRHEgz", - "outputId": "3c083d53-79db-4f8c-ef1a-a12754cb227f" + "outputId": "d05aabac-d3b2-4c20-f810-dc31a40daac7" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + } + ], "source": [ "model = load_llama_cpp_model(\n", " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", @@ -254,15 +382,265 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-qtPf9RmHEgz", - "outputId": "723ac0e9-23c4-470f-df81-1ec4065df532" + "outputId": "41d29e69-c393-4bc6-95d9-c9d5f94606be" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[32m2025-02-03 14:27:35.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-03 14:27:35.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:27:35.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-02-03 14:27:35.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:27:35.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-03 14:28:23.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:28:23.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-02-03 14:29:10.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:29:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-02-03 14:29:56.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:29:56.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-02-03 14:30:44.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:30:44.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-02-03 14:31:31.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:31:31.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-02-03 14:32:18.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:32:18.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-02-03 14:33:05.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:33:05.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-02-03 14:33:51.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:33:51.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-02-03 14:34:38.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:34:38.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:25.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:25.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:25.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:25.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:25.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:42.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:42.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:43.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:43.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:44.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:44.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:46.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:46.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:47.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:47.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:49.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:49.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:51.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:51.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:53.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B: NVIDIA P100\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:53.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:54.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:54.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:55.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:55.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:57.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:57.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:57.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:57.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:57.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:19.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:19.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:26.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:26.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:35.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:35.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:41.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:41.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:42.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:42.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:42.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:44.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:44.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:46.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:46.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:48.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:48.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:50.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:50.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:52.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:52.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:54.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:54.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:56.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:56.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:59.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:59.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:59.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:59.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:59.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:11.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:11.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:11.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:11.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:12.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:12.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:12.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 14\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:12.704\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:13.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:13.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:13.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:13.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:13.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:13.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:13.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:33.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:33.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:34.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:34.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:36.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:36.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:37.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:37.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:40.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:40.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:42.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:42.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:43.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:43.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:44.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:44.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:46.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:46.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:47.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:47.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:47.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:22.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:22.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:25.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:25.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:28.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:28.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:31.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:31.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:35.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:35.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:35.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:35.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:35.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:46.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:46.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:48.553\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:48.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:50.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:50.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:52.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:52.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:54.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:54.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:55.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:55.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:56.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:56.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:56.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:01.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:01.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:08.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:08.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:13.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:13.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:20.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:20.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:20.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:20.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:38.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:38.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:57.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:57.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:15.828\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:15.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:34.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:34.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:52.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:52.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:10.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:10.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:29.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:29.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:47.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:47.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:06.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:06.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:24.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:24.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:42.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:42.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:01.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:01.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:01.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:01.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:01.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:05.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:05.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:06.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:06.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:06.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:06.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:07.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:08.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:08.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:09.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:09.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:10.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:10.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:10.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:10.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:11.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:11.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:11.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:11.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:12.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:12.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:12.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:12.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:13.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:13.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:13.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:13.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:14.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:14.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:15.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:15.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:15.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:27.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:27.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:28.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:28.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:29.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:29.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:30.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:30.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:31.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:32.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:32.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:33.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:33.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:34.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:34.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:35.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:35.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:36.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:36.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:37.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:37.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:38.660\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:38.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:39.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n" + ] + } + ], "source": [ "from pathlib import Path\n", "from urllib.request import urlretrieve\n", @@ -294,36 +672,998 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "3eW9TIKjHEgz", - "outputId": "b82bcada-3c21-4d13-ee04-8b57eeb83c4c" + "outputId": "6ea51282-b725-45cb-a898-08fcfb974a98" }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "17 17 https://arxiv.org/pdf/2106.09685.pdf \n", + "22 22 https://authorsalliance.org/wp-content/uploads... \n", + "24 24 https://authorsalliance.org/wp-content/uploads... \n", + "27 27 https://arxiv.org/pdf/2201.11903 \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "29 29 https://arxiv.org/pdf/2201.11903 \n", + "30 30 https://arxiv.org/pdf/2201.11903 \n", + "31 31 https://arxiv.org/pdf/2201.11903 \n", + "32 32 https://arxiv.org/pdf/2201.11903 \n", + "33 33 https://arxiv.org/pdf/2201.11903 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", + "39 39 https://github.com/mozilla-ai/structured-qa/re... \n", + "40 40 https://github.com/mozilla-ai/structured-qa/re... \n", + "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", + "44 44 https://github.com/mozilla-ai/structured-qa/re... \n", + "62 62 https://github.com/mozilla-ai/structured-qa/re... \n", + "63 63 https://commission.europa.eu/document/download... \n", + "64 64 https://commission.europa.eu/document/download... \n", + "65 65 https://commission.europa.eu/document/download... \n", + "66 66 https://commission.europa.eu/document/download... \n", + "67 67 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "69 69 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "70 70 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "71 71 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "72 72 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "73 73 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "74 74 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "75 75 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "76 76 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "77 77 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "79 79 https://aiindex.stanford.edu/wp-content/upload... \n", + "80 80 https://aiindex.stanford.edu/wp-content/upload... \n", + "81 81 https://aiindex.stanford.edu/wp-content/upload... \n", + "82 82 https://aiindex.stanford.edu/wp-content/upload... \n", + "83 83 https://aiindex.stanford.edu/wp-content/upload... \n", + "84 84 https://aiindex.stanford.edu/wp-content/upload... \n", + "85 85 https://aiindex.stanford.edu/wp-content/upload... \n", + "86 86 https://aiindex.stanford.edu/wp-content/upload... \n", + "87 87 https://aiindex.stanford.edu/wp-content/upload... \n", + "88 88 https://aiindex.stanford.edu/wp-content/upload... \n", + "90 90 https://arxiv.org/pdf/2302.13971 \n", + "94 94 https://arxiv.org/pdf/2302.13971 \n", + "99 99 https://assets.publishing.service.gov.uk/media... \n", + "100 100 https://assets.publishing.service.gov.uk/media... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "17 4 OUR METHOD \n", + "22 HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER? \n", + "24 OVERCOMING RESERVATIONS ABOUT OPEN ACCESS \n", + "27 3 Arithmetic Reasoning \n", + "28 3.1 Experimental Setup \n", + "29 3.1 Experimental Setup \n", + "30 5 Symbolic Reasoning \n", + "31 5 Symbolic Reasoning \n", + "32 5 Symbolic Reasoning \n", + "33 3.4 Robustness of Chain of Thought \n", + "34 3.2 Results \n", + "37 CARD AND TILE EFFECTS \n", + "39 CHAPTER OVERVIEW \n", + "40 CHAPTER OVERVIEW \n", + "41 CHAPTER OVERVIEW \n", + "44 CARD AND TILE EFFECTS \n", + "62 GAME END \n", + "63 2.1. Toilets \n", + "64 CARBON MONOXIDE DETECTION AND VENTING \n", + "65 4.1. Natural lighting \n", + "66 1.2.1. Internal partitions and doors \n", + "67 5.2. Thread Hierarchy \n", + "68 5.2. Thread Hierarchy \n", + "69 6.1.1. Compilation Workflow \n", + "70 6.1.1. Compilation Workflow \n", + "71 6.1.1. Compilation Workflow \n", + "72 6.1.1. Compilation Workflow \n", + "73 15.3. API Fundamentals \n", + "74 15.3. API Fundamentals \n", + "75 15.3. API Fundamentals \n", + "76 15.3. API Fundamentals \n", + "77 23.1. What is Lazy Loading? \n", + "78 23.1. What is Lazy Loading? \n", + "79 Risk Perception \n", + "80 Risk Perception \n", + "81 Training Cost \n", + "82 Training Cost \n", + "83 LLM Tokenization Introduces Unfairness \n", + "84 LLM Tokenization Introduces Unfairness \n", + "85 U.S. Regulation \n", + "86 U.S. Regulation \n", + "87 Europe \n", + "88 Europe \n", + "90 2.1 Pre-training Data \n", + "94 3 Main results \n", + "99 Limitations of generative AI and LLMs \n", + "100 Procurement in an emerging market \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "17 Does LoRA work with any neural network contain... YES \n", + "22 how many peer-reviewed open access journals ar... A \n", + "24 Are publication fees required for all open acc... NO \n", + "27 Is Arithmetic reasoning is a task that languag... NO \n", + "28 How many large language models were evaluated? 5 \n", + "29 How many benchmarks were used to evaluate arit... 5 \n", + "30 Is symbolic reasoning usually simple for human... YES \n", + "31 How many words have the example names that the... B \n", + "32 Which symbolic reasoning task is used as an ou... A \n", + "33 How many annotators provided independent chain... 3 \n", + "34 How many random samples were examined to under... 100 \n", + "37 How many different races are there? 6 \n", + "39 Can you take a Chapter card and a Landmark til... NO \n", + "40 How many goins does a player take when discard... 3 \n", + "41 After taking a landmark tile, do you reveal a ... NO \n", + "44 Can you use a symbol more than once per turn? NO \n", + "62 If player 1 has 30 Victory points and 4 worker... A \n", + "63 Which type of water must be supplied in a toil... B \n", + "64 In which type of parkings must a carbon monoxi... C \n", + "65 What percentage is the daylight factor require... A \n", + "66 What fire resistance must vertical partitions ... A \n", + "67 What is the maximum number of threads within a... 1024 \n", + "68 Can you identify a thread with a four-dimensio... NO \n", + "69 In the offline compilation process using nvcc,... B \n", + "70 What are the two ways the host code can be out... B \n", + "71 What is the primary purpose of just-in-time (J... C \n", + "72 What happens to the compiled binary code after... A \n", + "73 When are virtual addresses assigned to graph a... C \n", + "74 What do graph memory nodes represent in a CUDA... A \n", + "75 When does a graph allocation's lifetime end? -... B \n", + "76 How must operations accessing graph memory be ... C \n", + "77 What is the primary benefit of Lazy Loading? -... A \n", + "78 Can you enable lazy loading by setting the env... NO \n", + "79 which type of risk was identified as the leadi... B \n", + "80 In which geographical area were fairness risks... C \n", + "81 What is a major consequence of the rising trai... A \n", + "82 How the AI Index and Epoch AI estimated traini... C \n", + "83 What is a major source of inequality in AI rel... A \n", + "84 What are the three major inequalities resultin... B \n", + "85 How many AI-related regulations were enacted i... 25 \n", + "86 Which of the following was identified as a hig... B \n", + "87 Which country had the highest proportion of fe... B \n", + "88 Which countries reported the smallest proporti... C \n", + "90 How many languages did the Wikipedia data cover? 20 \n", + "94 Was the model compared against GPT-4? NO \n", + "99 Can LLMs be used as an alternative to visiting... NO \n", + "100 Which of the following is NOT mentioned as a r... C \n", + "\n", + " pred_answer pred_section \n", + "10 YES NaN \n", + "17 NO NaN \n", + "22 B NaN \n", + "24 I NEED MORE INFO NaN \n", + "27 OUT OF CONTEXT NaN \n", + "28 OUT OF CONTEXT NaN \n", + "29 OUT OF CONTEXT NaN \n", + "30 OUT OF CONTEXT NaN \n", + "31 OUT OF CONTEXT NaN \n", + "32 OUT OF CONTEXT NaN \n", + "33 OUT OF CONTEXT NaN \n", + "34 OUT OF CONTEXT NaN \n", + "37 5 NaN \n", + "39 I NEED MORE INFO NaN \n", + "40 I NEED MORE INFO NaN \n", + "41 YES NaN \n", + "44 YES NaN \n", + "62 C NaN \n", + "63 OUT OF CONTEXT NaN \n", + "64 OUT OF CONTEXT NaN \n", + "65 OUT OF CONTEXT NaN \n", + "66 OUT OF CONTEXT NaN \n", + "67 OUT OF CONTEXT NaN \n", + "68 OUT OF CONTEXT NaN \n", + "69 OUT OF CONTEXT NaN \n", + "70 OUT OF CONTEXT NaN \n", + "71 OUT OF CONTEXT NaN \n", + "72 OUT OF CONTEXT NaN \n", + "73 OUT OF CONTEXT NaN \n", + "74 OUT OF CONTEXT NaN \n", + "75 OUT OF CONTEXT NaN \n", + "76 OUT OF CONTEXT NaN \n", + "77 OUT OF CONTEXT NaN \n", + "78 OUT OF CONTEXT NaN \n", + "79 OUT OF CONTEXT NaN \n", + "80 OUT OF CONTEXT NaN \n", + "81 OUT OF CONTEXT NaN \n", + "82 OUT OF CONTEXT NaN \n", + "83 OUT OF CONTEXT NaN \n", + "84 OUT OF CONTEXT NaN \n", + "85 OUT OF CONTEXT NaN \n", + "86 OUT OF CONTEXT NaN \n", + "87 OUT OF CONTEXT NaN \n", + "88 OUT OF CONTEXT NaN \n", + "90 8 NaN \n", + "94 I NEED MORE INFO NaN \n", + "99 I NEED MORE INFO NaN \n", + "100 I NEED MORE INFO NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1YESNaN
1717https://arxiv.org/pdf/2106.09685.pdf4 OUR METHODDoes LoRA work with any neural network contain...YESNONaN
2222https://authorsalliance.org/wp-content/uploads...HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?how many peer-reviewed open access journals ar...ABNaN
2424https://authorsalliance.org/wp-content/uploads...OVERCOMING RESERVATIONS ABOUT OPEN ACCESSAre publication fees required for all open acc...NOI NEED MORE INFONaN
2727https://arxiv.org/pdf/2201.119033 Arithmetic ReasoningIs Arithmetic reasoning is a task that languag...NOOUT OF CONTEXTNaN
2828https://arxiv.org/pdf/2201.119033.1 Experimental SetupHow many large language models were evaluated?5OUT OF CONTEXTNaN
2929https://arxiv.org/pdf/2201.119033.1 Experimental SetupHow many benchmarks were used to evaluate arit...5OUT OF CONTEXTNaN
3030https://arxiv.org/pdf/2201.119035 Symbolic ReasoningIs symbolic reasoning usually simple for human...YESOUT OF CONTEXTNaN
3131https://arxiv.org/pdf/2201.119035 Symbolic ReasoningHow many words have the example names that the...BOUT OF CONTEXTNaN
3232https://arxiv.org/pdf/2201.119035 Symbolic ReasoningWhich symbolic reasoning task is used as an ou...AOUT OF CONTEXTNaN
3333https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...3OUT OF CONTEXTNaN
3434https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...100OUT OF CONTEXTNaN
3737https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?65NaN
3939https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWCan you take a Chapter card and a Landmark til...NOI NEED MORE INFONaN
4040https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWHow many goins does a player take when discard...3I NEED MORE INFONaN
4141https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
4444https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSCan you use a symbol more than once per turn?NOYESNaN
6262https://github.com/mozilla-ai/structured-qa/re...GAME ENDIf player 1 has 30 Victory points and 4 worker...ACNaN
6363https://commission.europa.eu/document/download...2.1. ToiletsWhich type of water must be supplied in a toil...BOUT OF CONTEXTNaN
6464https://commission.europa.eu/document/download...CARBON MONOXIDE DETECTION AND VENTINGIn which type of parkings must a carbon monoxi...COUT OF CONTEXTNaN
6565https://commission.europa.eu/document/download...4.1. Natural lightingWhat percentage is the daylight factor require...AOUT OF CONTEXTNaN
6666https://commission.europa.eu/document/download...1.2.1. Internal partitions and doorsWhat fire resistance must vertical partitions ...AOUT OF CONTEXTNaN
6767https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...5.2. Thread HierarchyWhat is the maximum number of threads within a...1024OUT OF CONTEXTNaN
6868https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOOUT OF CONTEXTNaN
6969https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...6.1.1. Compilation WorkflowIn the offline compilation process using nvcc,...BOUT OF CONTEXTNaN
7070https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...6.1.1. Compilation WorkflowWhat are the two ways the host code can be out...BOUT OF CONTEXTNaN
7171https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...6.1.1. Compilation WorkflowWhat is the primary purpose of just-in-time (J...COUT OF CONTEXTNaN
7272https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...6.1.1. Compilation WorkflowWhat happens to the compiled binary code after...AOUT OF CONTEXTNaN
7373https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsWhen are virtual addresses assigned to graph a...COUT OF CONTEXTNaN
7474https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsWhat do graph memory nodes represent in a CUDA...AOUT OF CONTEXTNaN
7575https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsWhen does a graph allocation's lifetime end? -...BOUT OF CONTEXTNaN
7676https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...15.3. API FundamentalsHow must operations accessing graph memory be ...COUT OF CONTEXTNaN
7777https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...23.1. What is Lazy Loading?What is the primary benefit of Lazy Loading? -...AOUT OF CONTEXTNaN
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...23.1. What is Lazy Loading?Can you enable lazy loading by setting the env...NOOUT OF CONTEXTNaN
7979https://aiindex.stanford.edu/wp-content/upload...Risk Perceptionwhich type of risk was identified as the leadi...BOUT OF CONTEXTNaN
8080https://aiindex.stanford.edu/wp-content/upload...Risk PerceptionIn which geographical area were fairness risks...COUT OF CONTEXTNaN
8181https://aiindex.stanford.edu/wp-content/upload...Training CostWhat is a major consequence of the rising trai...AOUT OF CONTEXTNaN
8282https://aiindex.stanford.edu/wp-content/upload...Training CostHow the AI Index and Epoch AI estimated traini...COUT OF CONTEXTNaN
8383https://aiindex.stanford.edu/wp-content/upload...LLM Tokenization Introduces UnfairnessWhat is a major source of inequality in AI rel...AOUT OF CONTEXTNaN
8484https://aiindex.stanford.edu/wp-content/upload...LLM Tokenization Introduces UnfairnessWhat are the three major inequalities resultin...BOUT OF CONTEXTNaN
8585https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationHow many AI-related regulations were enacted i...25OUT OF CONTEXTNaN
8686https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationWhich of the following was identified as a hig...BOUT OF CONTEXTNaN
8787https://aiindex.stanford.edu/wp-content/upload...EuropeWhich country had the highest proportion of fe...BOUT OF CONTEXTNaN
8888https://aiindex.stanford.edu/wp-content/upload...EuropeWhich countries reported the smallest proporti...COUT OF CONTEXTNaN
9090https://arxiv.org/pdf/2302.139712.1 Pre-training DataHow many languages did the Wikipedia data cover?208NaN
9494https://arxiv.org/pdf/2302.139713 Main resultsWas the model compared against GPT-4?NOI NEED MORE INFONaN
9999https://assets.publishing.service.gov.uk/media...Limitations of generative AI and LLMsCan LLMs be used as an alternative to visiting...NOI NEED MORE INFONaN
100100https://assets.publishing.service.gov.uk/media...Procurement in an emerging marketWhich of the following is NOT mentioned as a r...CI NEED MORE INFONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 48,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 10,\n \"max\": 100,\n \"num_unique_values\": 48,\n \"samples\": [\n 72,\n 85,\n 71\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 29,\n \"samples\": [\n \"Limitations of generative AI and LLMs\",\n \"5.2. Thread Hierarchy\",\n \"2.1. Toilets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 48,\n \"samples\": [\n \"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\",\n \"How many AI-related regulations were enacted in the United States in 2023?\",\n \"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 13,\n \"samples\": [\n \"25\",\n \"C\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"NO\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 14 + } + ], "source": [ "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AhenESELHEgz", - "outputId": "7e08929e-1342-4705-b1b4-e8cac81c35a6" + "outputId": "b9a1c2d0-3628-4722-b99d-a8e044bde8f4" }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5339805825242718" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], "source": [ "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", "accuracy" ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "CD7lcTDjSM7T" + }, + "execution_count": null, + "outputs": [] } ], "metadata": { @@ -339,354 +1679,8 @@ "language_info": { "name": "python", "version": "3.10.12" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "082c72195b2b4a13b0ca7cecf368cebf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_96da7d2d05834aa98fe41ad43e5c2c62", - "placeholder": "​", - "style": "IPY_MODEL_e2ae5284ec9349a4a920585c12419e33", - "value": " 8.10G/8.10G [05:55<00:00, 22.8MB/s]" - } - }, - "575b8c7136f846c9ab62e7a796350506": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5d98e9f3ef544194b6a99d94c1b56cd2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "79ed82485b234525976e17fc9ebe47de": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c35c62230dfe4c32b2c45632e691cd46", - "IPY_MODEL_cfeae74e2e3f44e9b2e2dbbb9c756bd8", - "IPY_MODEL_082c72195b2b4a13b0ca7cecf368cebf" - ], - "layout": "IPY_MODEL_b09e7d3ff8cd498aa45faca3b390ac70" - } - }, - "83e32e91aff04efda8ca1efad07249cb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "96da7d2d05834aa98fe41ad43e5c2c62": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b09e7d3ff8cd498aa45faca3b390ac70": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c35c62230dfe4c32b2c45632e691cd46": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5d98e9f3ef544194b6a99d94c1b56cd2", - "placeholder": "​", - "style": "IPY_MODEL_83e32e91aff04efda8ca1efad07249cb", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" - } - }, - "cfeae74e2e3f44e9b2e2dbbb9c756bd8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d320191f9d7146f8b8ccd10e7ad6dd6d", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_575b8c7136f846c9ab62e7a796350506", - "value": 8098525888 - } - }, - "d320191f9d7146f8b8ccd10e7ad6dd6d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e2ae5284ec9349a4a920585c12419e33": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } } }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file From 4ea56e243afa9aa4eb75d3e132ed10e15c03da2b Mon Sep 17 00:00:00 2001 From: daavoo Date: Mon, 3 Feb 2025 17:14:01 +0100 Subject: [PATCH 107/120] Update gemini_full_context --- benchmark/gemini_full_context.ipynb | 916 ++++++++++++++-------------- 1 file changed, 455 insertions(+), 461 deletions(-) diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index d3c8e6d..c25fa76 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -44,7 +44,7 @@ "base_uri": "https://localhost:8080/" }, "id": "QrgOGtuGlyhT", - "outputId": "9ef0b54f-0c53-46ae-b242-c38114be8e1d" + "outputId": "d521f860-eab5-41b3-df4c-cbda5c15c5a6" }, "outputs": [ { @@ -52,100 +52,70 @@ "output_type": "stream", "text": [ "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-evgza823\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-evgza823\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-nwtt45ou\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-nwtt45ou\n", " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", " Switched to a new branch '5-add-benchmark'\n", " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit ae325d3fd34f87be6ec8ca17d9b56a9a96c983fd\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit c5ee8e63ab951b740147be2d69c2f00549043734\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev71+gae325d3)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (0.27.1)\n", - "Collecting loguru (from structured-qa==0.3.3.dev71+gae325d3)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (2.10.5)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev71+gae325d3)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev71+gae325d3) (6.0.2)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev71+gae325d3)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev71+gae325d3) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev71+gae325d3) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev71+gae325d3)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.25.5)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev71+gae325d3)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev71+gae325d3)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev71+gae325d3) (6.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.23.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev71+gae325d3) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev71+gae325d3) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (24.3.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.36.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev71+gae325d3) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m57.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m53.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev71+gae325d3-py3-none-any.whl size=16241 sha256=641ac0cca0f14ec4115b7b865280c1a7e23973690749a2ffed3794756c4dbe0d\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-k7clz5in/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=af951b418ae7e04eab976db91832b61bade5b62b8d6eeb63df48c5254e6bb4ad\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - "Successfully built structured-qa fire\n", - "Installing collected packages: watchdog, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", - "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 streamlit-1.41.1 structured-qa-0.3.3.dev71+gae325d3 watchdog-6.0.0\n" + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (3.12.1)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev105+gc5ee8e6) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev105+gc5ee8e6) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.17.0)\n" ] } ], @@ -161,23 +131,23 @@ "base_uri": "https://localhost:8080/" }, "id": "S22kTrfPlyhU", - "outputId": "ef9530ba-1b0f-4436-cde6-536094af655a" + "outputId": "676b4a64-71f6-416b-da99-6b393f324870" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "--2025-01-28 14:06:00-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2025-02-03 14:30:33-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", - "Length: 21734 (21K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", + "Length: 21441 (21K) [text/plain]\n", + "Saving to: ‘structured_qa.csv.2’\n", "\n", - "structured_qa.csv 100%[===================>] 21.22K --.-KB/s in 0.007s \n", + "\rstructured_qa.csv.2 0%[ ] 0 --.-KB/s \rstructured_qa.csv.2 100%[===================>] 20.94K --.-KB/s in 0s \n", "\n", - "2025-01-28 14:06:01 (3.14 MB/s) - ‘structured_qa.csv’ saved [21734/21734]\n", + "2025-02-03 14:30:33 (100 MB/s) - ‘structured_qa.csv.2’ saved [21441/21441]\n", "\n" ] } @@ -268,7 +238,10 @@ " time.sleep(60)\n", " question = row[\"question\"]\n", " logger.info(f\"Question: {question}\")\n", - " response = model.model.generate_content([file, question])\n", + " try:\n", + " response = model.model.generate_content([file, question])\n", + " except Exception:\n", + " response_json = json.dumps({\"answer\": \"Error\", \"section\": \"Error\"})\n", " logger.info(response.text)\n", " response_json = json.loads(response.text)\n", " answers[index] = response_json[\"answer\"]\n", @@ -370,557 +343,578 @@ "height": 1000 }, "id": "AZBwRnfjlyhZ", - "outputId": "e3713b0b-3791-441f-ba5c-d3897fbe2468" + "outputId": "f3ace26a-eb56-405a-a436-d802854f29bd" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2025-01-28 14:06:05.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:05.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:07.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:07.491\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:12.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:06:12.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:27.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Responsible AI\",\n", + "\u001b[32m2025-02-03 14:30:36.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-03 14:30:36.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:30:36.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-02-03 14:30:36.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:30:38.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:30:38.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-03 14:31:38.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Assessing Responsible AI\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:07:27.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:33.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Assessing Responsible Al\",\n", + "\u001b[32m2025-02-03 14:31:38.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-02-03 14:32:37.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"3.1 Assessing Responsible AI\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:08:33.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:33.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"Chapter 1: Research and Development\",\n", + "\u001b[32m2025-02-03 14:32:37.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-02-03 14:33:43.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Chapter 1: Research and Development\",\n", " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:09:33.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:37.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.3 Frontier Al Research\",\n", + "\u001b[32m2025-02-03 14:33:43.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-02-03 14:34:49.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Appendix\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:10:37.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"2.2 Language\",\n", - " \"answer\": \"A\"\n", + "\u001b[32m2025-02-03 14:34:49.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-02-03 14:35:48.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"2.2 Language\",\n", + "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:11:47.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-01-28 14:12:50.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:35:48.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-02-03 14:36:49.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"1.3 Frontier Al Research\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:12:50.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:07.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:36:49.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-02-03 14:37:49.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"7.1 Overview of Al Policy in 2023\",\n", " \"answer\": 25\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:14:07.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:14.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"7.4 Al Regulation\",\n", - "\"answer\": \"B\"\n", + "\u001b[32m2025-02-03 14:37:49.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-02-03 14:38:54.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"7.4 Al Regulation\",\n", + " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:15:14.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:15.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:38:54.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-02-03 14:39:58.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", "\"section\": \"6.1 Postsecondary CS and Al Education\",\n", "\"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:16:15.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:21.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:39:58.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-02-03 14:40:56.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"8.1 Al Postsecondary Education\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:21.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:22.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:26.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:26.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:40:56.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-02-03 14:40:56.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:40:56.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:40:58.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:40:58.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:03.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3 Model Architecture\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:32.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 3442.83ms\n", - "\u001b[32m2025-01-28 14:17:39.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:03.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:07.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", " \"answer\": 6\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:39.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:44.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:07.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:12.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", " \"answer\": 6\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:44.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:49.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:12.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:16.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3.2.2 Multi-Head Attention\",\n", " \"answer\": 8\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:49.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:53.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:16.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:20.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3.4 Embeddings and Softmax\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:53.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:58.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:20.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:24.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"6.2 Model Variations\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:17:58.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 3822.10ms\n", - "\u001b[32m2025-01-28 14:18:06.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:24.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:33.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5.2 Hardware and Schedule\",\n", - " \"answer\": 8\n", + " \"answer\": \"8\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:06.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:12.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:33.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:36.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5.2 Hardware and Schedule\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:12.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:16.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:36.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:43.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5.3 Optimizer\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:16.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:23.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:43.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-03 14:41:46.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5.3 Optimizer\",\n", - " \"answer\": 4000\n", + " \"answer\": \"4000\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:18:23.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:23.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:28.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:41:46.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:46.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:50.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5.4 Regularization\",\n", " \"answer\": \"0.1\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:28.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:29.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:29.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:33.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:33.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:41.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:42:50.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:50.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:50.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:52.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:42:52.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:00.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"4 OUR METHOD\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:41.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:53.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:43:00.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"ABSTRACT\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:19:53.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:03.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:43:07.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:16.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"1 INTRODUCTION\",\n", " \"answer\": \"175\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:03.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:11.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"4. Our method\",\n", - "\"answer\": \"No\"\n", + "\u001b[32m2025-02-03 14:43:16.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:23.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Method\",\n", + " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:11.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:11.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:11.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:15.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:15.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:25.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:43:23.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:23.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:23.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:24.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:24.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-03 14:43:47.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", "\"section\": \"3 Arithmetic Reasoning\",\n", "\"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:25.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:35.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:43:47.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:01.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", "\"section\": \"3.1 Experimental Setup\",\n", "\"answer\": 5\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:35.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:43.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.1 Experimental Setup\",\n", + "\u001b[32m2025-02-03 14:44:01.461\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:09.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"3 Arithmetic Reasoning\",\n", "\"answer\": 5\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:43.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:51.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:44:09.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:17.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5 Symbolic Reasoning\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:20:51.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:07.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:44:17.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:42.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", "\"section\": \"5 Symbolic Reasoning\",\n", "\"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:07.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:17.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:44:42.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:50.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5 Symbolic Reasoning\",\n", " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:17.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:26.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:44:50.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-02-03 14:44:57.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", "\"section\": \"3.4 Robustness of Chain of Thought\",\n", "\"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:26.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:40.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:44:57.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:07.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3.2 Results\",\n", " \"answer\": \"100\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:40.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:41.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:41.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:45.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:45.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:49.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:45:07.861\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:07.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:07.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:12.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"2.4. Recurrent Networks\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:49.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:53.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:45:12.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:15.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3. Experimental Results\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:53.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:57.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:45:15.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:19.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"3. Experimental Results\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:21:57.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:00.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"3. Experimental Results\",\n", - "\"answer\": 14\n", + "\u001b[32m2025-02-03 14:45:19.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:22.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", + " \"answer\": \"14\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:00.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:05.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"Experimental Results\",\n", + "\u001b[32m2025-02-03 14:45:22.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:25.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. Experimental Results\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:05.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:09.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:45:25.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:28.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"4. Conclusion\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:09.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:11.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:11.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:14.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:14.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:32.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. How “Open” Do You Want to Make Your Work?\",\n", + "\u001b[32m2025-02-03 14:45:28.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:28.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:28.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:29.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:29.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:37.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"2.1 Pre-training Data\",\n", + "\"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:37.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"2.1 Pre-training Data\",\n", + "\"answer\": 20\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:48.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:55.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"2.3 Optimizer\",\n", + " \"answer\": \"A\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:45:55.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:18.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"2.3 Optimizer\",\n", + "\"answer\": 0.1\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:18.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:26.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"3 Main results\",\n", + "\"answer\": 20\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:36.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"3 Main results\",\n", + "\"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:36.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:47.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"5. Bias, Toxicity and Misinformation\",\n", + " \"answer\": \"Yes\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:47.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:54.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"5. Bias, Toxicity and Misinformation\",\n", + "\"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:46:54.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:04.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"5.2 CrowS-Pairs\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:04.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:04.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:04.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:05.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:05.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:15.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Limitations of generative Al and LLMs\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:15.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:42.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Limitations of generative Al and LLMs\",\n", + " \"answer\": \"No\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:47:42.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Procurement in an emerging market\",\n", + " \"answer\": \"C\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:28.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:41.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Accountability and responsibility\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:41.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:54.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Accountability and responsibility\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:32.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:53.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", + "\u001b[32m2025-02-03 14:48:54.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:55.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:55.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:57.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:48:57.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-03 14:49:39.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"1. Introduction\",\n", + " \"answer\": \"B\"\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 14:49:39.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-03 14:49:50.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"Chapter 5\",\n", "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:22:53.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-01-28 14:23:07.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:49:50.955\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:01.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"2. Benefits of Open Access\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:23:07.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-01-28 14:23:29.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:50:01.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:26.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:23:29.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-01-28 14:23:49.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Open Access Policies\",\n", + "\u001b[32m2025-02-03 14:50:26.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:38.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"3. Open Access Policies\",\n", " \"answer\": \"2015\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:23:49.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:01.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"Chapter 5\",\n", - "\"answer\": \"No\"\n", + "\u001b[32m2025-02-03 14:50:38.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:53.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"Chapter 5\",\n", + " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:01.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:03.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:03.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:07.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:07.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:25.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", - "\"answer\": \"B\"\n", + "\u001b[32m2025-02-03 14:50:53.028\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:53.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:53.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:55.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:50:55.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-03 14:51:21.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", + " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:25.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:41.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:51:21.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-02-03 14:51:34.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:41.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:54.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. VISUAL COMFORT\",\n", - " \"answer\": \"A\"\n", + "\u001b[32m2025-02-03 14:51:34.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-02-03 14:51:47.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"4. VISUAL COMFORT\",\n", + "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:24:54.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-01-28 14:25:07.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"I.1.3. OCCUPATIONAL SAFETY\",\n", + "\u001b[32m2025-02-03 14:51:47.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-02-03 14:52:01.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:25:07.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:25:08.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:25:08.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:25:11.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:25:11.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-01-28 14:26:08.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:52:01.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:52:01.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 14:52:01.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 14:52:03.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 14:52:03.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-03 14:53:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5.2. Thread Hierarchy\",\n", " \"answer\": 1024\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:26:08.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-01-28 14:27:06.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:53:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-02-03 14:53:55.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", "\"section\": \"5.2. Thread Hierarchy\",\n", "\"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:27:06.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-01-28 14:28:04.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"6.1.1. Compilation Workflow\",\n", - "\"answer\": \"B\"\n", + "\u001b[32m2025-02-03 14:53:55.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-02-03 14:54:48.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"6.1.1.1 Offline Compilation\",\n", + " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:28:04.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-01-28 14:29:14.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:54:48.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-02-03 14:55:38.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", "\"section\": \"6.1.1.1 Offline Compilation\",\n", - "\"answer\": \"B\"\n", + " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:29:14.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-01-28 14:30:17.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:55:38.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-02-03 14:56:37.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"6.1.1.2 Just-in-Time Compilation\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:30:17.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-01-28 14:31:26.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:56:37.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-02-03 14:57:25.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"6.1.1.2 Just-in-Time Compilation\",\n", " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:31:26.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-01-28 14:32:27.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"15.3. API Fundamentals\",\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:32:27.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memmory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-01-28 14:33:35.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"15 Graph Memory Nodes\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:33:35.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-01-28 14:34:37.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"15.3. API Fundamentals\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:34:37.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-01-28 14:35:32.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 14:57:25.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-02-03 14:58:17.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"15.3. API Fundamentals\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:35:32.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:36:32.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initalization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-01-28 14:37:40.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"23.1. What is Lazy Loading?\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:37:40.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-01-28 14:38:37.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"23.1. What is Lazy Loading?\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:38:37.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\u001b[0m\n", - "\u001b[32m2025-01-28 14:38:39.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689 to ?uri=OJ:L_202401689.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:38:39.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:38:44.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:38:44.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: what is a requirement for datasets used in high-risk AI systems? -A: Exclusively open-source datasets -B: Datasets ensuring quality and diversity -C: Datasets not exceeding 1 GB in size\u001b[0m\n", - "\u001b[32m2025-01-28 14:39:10.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 10\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:39:10.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\u001b[0m\n", - "\u001b[32m2025-01-28 14:39:30.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 51\",\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:39:30.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What should providers of AI systems that generate synthetic content ensure? -A: That the content is not marked in any way. -B: That the outputs are marked in a machine-readable format and detectable as artificially generated or manipulated. -C: That there is no way to detect that the content is synthetic.\u001b[0m\n", - "\u001b[32m2025-01-28 14:39:52.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"(133)\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:39:52.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How long does a market surveillance authority have to take appropriate measures after receiving notification of a serious incident? -A: 3 days -B: 7 days -C: 14 days\u001b[0m\n", - "\u001b[32m2025-01-28 14:40:13.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 73\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:40:13.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum duration of testing in real-world conditions? -A: 3 months -B: 6 months, with a possible extension of an additional 6 months. -C: 12 months\u001b[0m\n", - "\u001b[32m2025-01-28 14:40:32.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 60\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:40:32.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum fine for supplying incorrect, incomplete, or misleading information to notified bodies or national competent authorities? -A: 7,500,000 EUR or 1% of annual turnover, whichever is higher. -B: 5,000,000 EUR or 0.5 % of annual turnover, whichever is higher -C: 10,000,000 EUR or 5% of annual turnover, whichever is higher\u001b[0m\n", - "\u001b[32m2025-01-28 14:40:52.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 99\",\n", + "\u001b[32m2025-02-03 14:58:17.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-02-03 14:59:16.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"15 Graph Memory Nodes\",\n", "\"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:40:52.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: By what date should codes of practice be ready? -A: 2 May 2025 -B: 2 May 2024 -C: 2 August 2025\u001b[0m\n", - "\u001b[32m2025-01-28 14:41:10.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 56\",\n", - "\"answer\": \"A\"\n", + "\u001b[32m2025-02-03 14:59:16.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-02-03 15:00:16.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"15.3. API Fundamentals\",\n", + "\"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:41:10.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the time period for a market surveillance authority to inform the Commission of a finding related to a non-compliant AI system? -A: 1 month -B: 2 months -C: Immediately\u001b[0m\n", - "\u001b[32m2025-01-28 14:41:40.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"Article 79\",\n", - " \"answer\": \"C\"\n", + "\u001b[32m2025-02-03 15:00:16.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-02-03 15:01:09.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"15.3. API Fundamentals\",\n", + "\"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:41:40.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How long after a high-risk AI system has been placed on the market or put into service must the authorized representative keep the technical documentation, EU declaration of conformity and certificates available for competent authorities? -A: 5 years -B: 10 years C: 15 years\u001b[0m\n", - "\u001b[32m2025-01-28 14:41:57.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 18\",\n", - "\"answer\": \"B\"\n", + "\u001b[32m2025-02-03 15:01:09.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 15:02:09.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-02-03 15:03:02.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"23.1. What is Lazy Loading?\",\n", + " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:41:57.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How long is the term of office for a Member State representative on the European Artificial Intelligence Board? -A: 2 years, renewable once -B: 3 years, renewable once -C: 4 years, renewable once\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:17.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"Article 65\",\n", - "\"answer\": \"B\"\n", + "\u001b[32m2025-02-03 15:03:02.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:03.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"23.1. What is Lazy Loading?\",\n", + " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:17.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:19.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:19.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:23.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:23.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:32.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:04:03.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:04.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:04.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:05.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:05.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:15.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"OVERVIEW AND GOAL\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:32.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:41.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:04:15.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:24.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"END OF THE GAME\",\n", " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:41.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:50.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:04:24.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:37.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5. CARD AND TILE EFFECTS\",\n", " \"answer\": 6\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:50.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:58.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:04:37.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:46.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"Turn overview\",\n", " \"answer\": \"A\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:42:58.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:08.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:04:46.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-03 15:04:55.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"Turn overview\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:08.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:16.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"CHAPTER OVERVIEW\",\n", - " \"answer\": \"3\"\n", + "\u001b[32m2025-02-03 15:04:55.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-03 15:05:03.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"A. Take a Chapter card\",\n", + " \"answer\": 3\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:16.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:25.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"B. Take a Landmark tile\",\n", + "\u001b[32m2025-02-03 15:05:03.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-03 15:05:11.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"3. CHAPTER OVERVIEW\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:25.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:34.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"4. CARD AND TILE COSTS\",\n", - "\"answer\": \"Yes\"\n", + "\u001b[32m2025-02-03 15:05:11.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-03 15:05:21.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"4. Card and Tile Costs\",\n", + " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:34.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:43.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:05:21.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-03 15:05:28.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"4. CARD AND TILE COSTS\",\n", " \"answer\": 2\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:43.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:52.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:05:28.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-03 15:05:37.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5. CARD AND TILE EFFECTS\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:43:52.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:44:52.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:01.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:05:37.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 15:06:37.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-03 15:06:45.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"CARD AND TILE EFFECTS\",\n", " \"answer\": \"B\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:01.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:10.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:06:45.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-03 15:06:58.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"5. CARD AND TILE EFFECTS\",\n", " \"answer\": \"C\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:10.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:18.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - "\"section\": \"CONQUERING MIDDLE-EARTH\",\n", - "\"answer\": \"No\"\n", + "\u001b[32m2025-02-03 15:06:58.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", + " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:18.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 8332.30ms\n", - "\u001b[32m2025-01-28 14:45:36.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:07:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:15.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"END OF THE GAME\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:36.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:46.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:07:15.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:23.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"END OF THE GAME\",\n", - " \"answer\": \"7\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:46.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:50.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:50.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:53.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:53.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:59.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + " \"answer\": 7\n", + "}\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:23.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:23.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:23.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:25.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:25.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:31.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"LOOKOUT PHASE\",\n", " \"answer\": 4\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:45:59.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:06.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:07:31.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:36.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"6. GAME FLOW\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:06.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:13.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"7. GAME FLOW\",\n", - " \"answer\": \"No\"\n", + "\u001b[32m2025-02-03 15:07:36.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:42.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + "\"section\": \"ACTION PHASE\",\n", + "\"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:13.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:19.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:07:42.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:48.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"GAME FLOW\",\n", " \"answer\": 25\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:19.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:25.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:07:48.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-03 15:07:54.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"EXPEDITION PHASE\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:25.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:30.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:07:54.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-03 15:08:01.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"7. EXPEDITION PHASE\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:30.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:37.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"EXPEDITION PHASE\",\n", - " \"answer\": 1\n", + "\u001b[32m2025-02-03 15:08:01.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-03 15:08:07.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"7. EXPEDITION PHASE\",\n", + " \"answer\": \"1\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:37.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:43.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:08:07.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-03 15:08:12.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"CLEANUP PHASE\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:43.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:49.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"9. ACTIONS\",\n", + "\u001b[32m2025-02-03 15:08:12.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-03 15:08:19.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"LOCATION ABILITIES\",\n", " \"answer\": \"1\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:49.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allos to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:56.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", - " \"section\": \"ACTIONS\",\n", + "\u001b[32m2025-02-03 15:08:19.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-03 15:08:24.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", + " \"section\": \"9. BUILD A LOCATION\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:46:56.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-01-28 14:47:56.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-01-28 14:48:02.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:08:24.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-03 15:09:24.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-03 15:09:30.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"ACTIONS\",\n", " \"answer\": \"No\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:48:02.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-01-28 14:48:07.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:09:30.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-03 15:09:36.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"GAME END\",\n", " \"answer\": \"Yes\"\n", "}\u001b[0m\n", - "\u001b[32m2025-01-28 14:48:07.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-01-28 14:48:13.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m{\n", + "\u001b[32m2025-02-03 15:09:36.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-03 15:09:42.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", " \"section\": \"GAME END\",\n", " \"answer\": \"A\"\n", "}\u001b[0m\n" @@ -977,18 +971,18 @@ "height": 175 }, "id": "EYYJgWf6lyha", - "outputId": "70ed5703-a6fb-42c9-a2c5-79a5639001db" + "outputId": "a6fcb444-24c2-4dd0-84f4-cbad3011e567" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 34,\n \"min\": 5,\n \"max\": 88,\n \"num_unique_values\": 4,\n \"samples\": [\n 49,\n 88,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"CHAPTER OVERVIEW\",\n \"23.1. What is Lazy Loading?\",\n \"3.5 Positional Encoding\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Can you take a Chapter card and a Landmark tile on your same turn?\",\n \"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\",\n \"Does the final model use learned positional embeddings?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Turn overview\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "summary": "{\n \"name\": \"results\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 5,\n \"max\": 78,\n \"num_unique_values\": 4,\n \"samples\": [\n 39,\n 78,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"CHAPTER OVERVIEW\",\n \"23.1. What is Lazy Loading?\",\n \"3.5 Positional Encoding\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Can you take a Chapter card and a Landmark tile on your same turn?\",\n \"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\",\n \"Does the final model use learned positional embeddings?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Turn overview\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe" }, "text/html": [ "\n", - "
\n", + "
\n", "
\n", "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
00https://arxiv.org/pdf/1706.037623 Model ArchitectureWhat type of architecture does the model use? ...CGENERATION ERRORNaN
11https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the encoder?6GENERATION ERRORNaN
22https://arxiv.org/pdf/1706.037623.1 Encoder and Decoder StacksHow many layers compose the decoder?6GENERATION ERRORNaN
33https://arxiv.org/pdf/1706.037623.2.2 Multi-Head AttentionHow many parallel attention heads are used?8GENERATION ERRORNaN
44https://arxiv.org/pdf/1706.037623.4 Embeddings and SoftmaxDoes the final model use learned embeddings fo...YESGENERATION ERRORNaN
........................
9494https://aiindex.stanford.edu/wp-content/upload...LLM Tokenization Introduces UnfairnessWhat are the three major inequalities resultin...BGENERATION ERRORNaN
9595https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationHow many AI-related regulations were enacted i...25GENERATION ERRORNaN
9696https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationWhich of the following was identified as a hig...BGENERATION ERRORNaN
9797https://aiindex.stanford.edu/wp-content/upload...EuropeWhich country had the highest proportion of fe...BGENERATION ERRORNaN
9898https://aiindex.stanford.edu/wp-content/upload...EuropeWhich countries reported the smallest proporti...CGENERATION ERRORNaN
\n", - "

99 rows × 7 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 99,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 0,\n \"max\": 98,\n \"num_unique_values\": 99,\n \"samples\": [\n 62,\n 40,\n 95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 59,\n \"samples\": [\n \"3 Model Architecture\",\n \"5.2 Hardware and Schedule\",\n \"CARD AND TILE EFFECTS\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 98,\n \"samples\": [\n \"Can you raid the locations of a player that has passed during the action phase?\",\n \"Is symbolic reasoning usually simple for humans but challenging for language models?\",\n \"How many AI-related regulations were enacted in the United States in 2023?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 21,\n \"samples\": [\n \"C\",\n \"4\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"GENERATION ERROR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 9 - } - ], + "outputs": [], "source": [ "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " results.loc[index, \"pred_answer\"] = result[\"pred_answer\"].strip()\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { - "id": "AhenESELHEgz", - "outputId": "4b5d7785-4d17-4c78-b0f8-d69fa50bad15", - "colab": { - "base_uri": "https://localhost:8080/" - } + "id": "AhenESELHEgz" }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0.0" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ], + "outputs": [], "source": [ "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", "accuracy" ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "-acmSBPMvo1w" + }, + "execution_count": null, + "outputs": [] } ], "metadata": { From 8af98dfa90bc8faf742d3c3e0394242b60a21933 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 4 Feb 2025 12:40:56 +0100 Subject: [PATCH 110/120] Update with type --- benchmark/structured_qa.csv | 208 ++++++++++++++++++------------------ 1 file changed, 104 insertions(+), 104 deletions(-) diff --git a/benchmark/structured_qa.csv b/benchmark/structured_qa.csv index 6739d1b..dd8b507 100644 --- a/benchmark/structured_qa.csv +++ b/benchmark/structured_qa.csv @@ -1,104 +1,104 @@ -document,section,question,answer -https://arxiv.org/pdf/1706.03762,3 Model Architecture,"What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder",C -https://arxiv.org/pdf/1706.03762,3.1 Encoder and Decoder Stacks,"How many layers compose the encoder?",6 -https://arxiv.org/pdf/1706.03762,3.1 Encoder and Decoder Stacks,"How many layers compose the decoder?",6 -https://arxiv.org/pdf/1706.03762,3.2.2 Multi-Head Attention,"How many parallel attention heads are used?",8 -https://arxiv.org/pdf/1706.03762,3.4 Embeddings and Softmax,"Does the final model use learned embeddings for the input and output tokens?",YES -https://arxiv.org/pdf/1706.03762,3.5 Positional Encoding,"Does the final model use learned positional embeddings?",NO -https://arxiv.org/pdf/1706.03762,5.2 Hardware and Schedule,"How many GPUs were used for training?",8 -https://arxiv.org/pdf/1706.03762,5.2 Hardware and Schedule,"What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4",B -https://arxiv.org/pdf/1706.03762,5.3 Optimizer,"What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",B -https://arxiv.org/pdf/1706.03762,5.3 Optimizer,"How many warmup steps were used?",4000 -https://arxiv.org/pdf/1706.03762,5.4 Regularization,"What was the dropout rate used for the base model?",0.1 -https://arxiv.org/pdf/2210.05189,2.4 Recurrent Networks,"Can recurrent networks also be converted to decision trees?",YES -https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the toy model (y = x^2)?",3 -https://arxiv.org/pdf/2210.05189,3 Experimental Results,"Does the toy model (y = x^2) use Sigmoid activation function?",NO -https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many parameters are in the toy model (y = x^2) tree?",14 -https://arxiv.org/pdf/2210.05189,3 Experimental Results,"How many layers are in the half-moon neural network?",3 -https://arxiv.org/pdf/2210.05189,3 Experimental Results,"What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy",B -https://arxiv.org/pdf/2106.09685.pdf,4 OUR METHOD,Does LoRA work with any neural network containing dense layers?,YES -https://arxiv.org/pdf/2106.09685.pdf,LORA ABSTRACT,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C -https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,"In billions, how many trainable parameters does GPT-3 have?",175 -https://arxiv.org/pdf/2106.09685.pdf,1 INTRODUCTION,Does LoRA introduce additional inference latency compared to full fine-tuning?,NO -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,SECTION I. INTRODUCTION,"According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license",B -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000",A -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,SECTION I. INTRODUCTION,Does open access eliminate price barriers?,YES -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,OVERCOMING RESERVATIONS ABOUT OPEN ACCESS,Are publication fees required for all open access journals?,NO -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,WHY ARE OPEN ACCESS POLICIES ADOPTED?,In what year did the Bill and Melinda Gates foundation implement an open access policy?,2015 -https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?,Are Gold Open Access and Green Open Access mutually exclusive.,NO -https://arxiv.org/pdf/2201.11903,3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,NO -https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,5 -https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,5 -https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,YES -https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4,B -https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A -https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,3 -https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples were examined to understand model performance?,100 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many chapters does the game last?,3 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,OVERVIEW AND GOAL,How many victory conditions are there?,3 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,How many different races are there?,6 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Which player begins the game? -A: Sauron -B: The Fellowship -C: Other,A -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,Can you take a Chapter card and a Landmark tile on your same turn?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,How many goins does a player take when discarding a card during Chapter 3?,3 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CHAPTER OVERVIEW,"After taking a landmark tile, do you reveal a new tile and the end of your turn?",NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?,YES -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE COSTS,"If a player is missing 2 skill symbols, how many coins must they pay to the reserve?",2 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,Can you use a symbol more than once per turn?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue,B -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CARD AND TILE EFFECTS,During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3,C -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,CONQUERING MIDDLE-EARTH,"If you place or move an unit and an enemy fortress is present, does it trigger a conflict?",NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,Can the game end in a tie?,YES -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,END OF THE GAME,In how many regions do you need to be present to win the game?,7 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOOKOUT PHASE,What is the maximum number of cards a player may acquire during the lookout phase?,4 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOOKOUT PHASE,Is there a limit to the number of cards a player may have in their hand?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTION PHASE,"Can you raid the locations of a player that has passed during the action phase?",NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,ACTION PHASE,How many points in the scoreboard must be reached during the Action phase to trigger the final round?,25 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,Can players conquer and pillage the same island during the expedition phase?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,Do you need a fish to conquer a distant island?,YES -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,EXPEDITION PHASE,How many victory points you get from each conquered island?,1 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,CLEANUP PHASE,Is there a cleanup phase in the final round?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOCATION ABILITIES,How many victory points are granted by a built Field Location card that work as an upgrade?,1 -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,LOCATION ABILITIES,Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?,YES -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,RAID,Can you use the raid action without a Raze token?,NO -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,Can the game end in a tie?,YES -https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,GAME END,"If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie",A -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,2.1. Toilets,"Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold",B -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,CARBON MONOXIDE DETECTION AND VENTING,"In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground",C -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,4.1. Natural lighting,"What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%",A -https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,1.2.1. Internal partitions and doors,"What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,5.2. Thread Hierarchy,"What is the maximum number of threads within a thread block?",1024 -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,5.2. Thread Hierarchy,"Can you identify a thread with a four-dimensional index?",NO -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.",B -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.",B -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.",C -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,6.1.1. Compilation Workflow,"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.",A -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.",C -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.",A -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.",B -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,15.3. API Fundamentals,"How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.",C -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,23.1. What is Lazy Loading?,"What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.",A -https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,23.1. What is Lazy Loading?,"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?",NO -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Risk Perception,"which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.",B -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Risk Perception,"In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.",C -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Training Cost,"What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.",A -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Training Cost,"How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.",C -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,LLM Tokenization Introduces Unfairness,"What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.",A -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,LLM Tokenization Introduces Unfairness,"What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.",B -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,U.S. Regulation,"How many AI-related regulations were enacted in the United States in 2023?",25 -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,U.S. Regulation,"Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance",B -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Europe,"Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom",B -https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,Europe,"Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.",C -https://arxiv.org/pdf/2302.13971,"2.1 Pre-training Data","What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%",A -https://arxiv.org/pdf/2302.13971,"2.1 Pre-training Data","How many languages did the Wikipedia data cover?",20 -https://arxiv.org/pdf/2302.13971,"2.3 Optimizer","What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",A -https://arxiv.org/pdf/2302.13971,"2.3 Optimizer","What value was used for the weight decay?",0.1 -https://arxiv.org/pdf/2302.13971,"3 Main results","How many benchmarks were tested?",20 -https://arxiv.org/pdf/2302.13971,"3 Main results","Was the model compared against GPT-4?",NO -https://arxiv.org/pdf/2302.13971,"5 Bias, Toxicity and Misinformation","Can LLMs re-produce biases that exist in training data?",YES -https://arxiv.org/pdf/2302.13971,"5 Bias, Toxicity and Misinformation","Do authors consider the evaluations enough to fully comprehend the risks of the model?",NO -https://arxiv.org/pdf/2302.13971,"5.2 CrowS-Pairs","Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?",NO -https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Limitations of generative AI and LLMs","Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization",C -https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Limitations of generative AI and LLMs","Can LLMs be used as an alternative to visiting a doctor?",NO -https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Procurement in an emerging market","Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?",C -https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Accountability and responsibility","what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.",B -https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Accountability and responsibility","What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.",B +document,type,section,question,answer +https://arxiv.org/pdf/1706.03762,Scientific Paper,3 Model Architecture,"What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder",C +https://arxiv.org/pdf/1706.03762,Scientific Paper,3.1 Encoder and Decoder Stacks,"How many layers compose the encoder?",6 +https://arxiv.org/pdf/1706.03762,Scientific Paper,3.1 Encoder and Decoder Stacks,"How many layers compose the decoder?",6 +https://arxiv.org/pdf/1706.03762,Scientific Paper,3.2.2 Multi-Head Attention,"How many parallel attention heads are used?",8 +https://arxiv.org/pdf/1706.03762,Scientific Paper,3.4 Embeddings and Softmax,"Does the final model use learned embeddings for the input and output tokens?",YES +https://arxiv.org/pdf/1706.03762,Scientific Paper,3.5 Positional Encoding,"Does the final model use learned positional embeddings?",NO +https://arxiv.org/pdf/1706.03762,Scientific Paper,5.2 Hardware and Schedule,"How many GPUs were used for training?",8 +https://arxiv.org/pdf/1706.03762,Scientific Paper,5.2 Hardware and Schedule,"What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4",B +https://arxiv.org/pdf/1706.03762,Scientific Paper,5.3 Optimizer,"What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",B +https://arxiv.org/pdf/1706.03762,Scientific Paper,5.3 Optimizer,"How many warmup steps were used?",4000 +https://arxiv.org/pdf/1706.03762,Scientific Paper,5.4 Regularization,"What was the dropout rate used for the base model?",0.1 +https://arxiv.org/pdf/2210.05189,Scientific Paper,2.4 Recurrent Networks,"Can recurrent networks also be converted to decision trees?",YES +https://arxiv.org/pdf/2210.05189,Scientific Paper,3 Experimental Results,"How many layers are in the toy model (y = x^2)?",3 +https://arxiv.org/pdf/2210.05189,Scientific Paper,3 Experimental Results,"Does the toy model (y = x^2) use Sigmoid activation function?",NO +https://arxiv.org/pdf/2210.05189,Scientific Paper,3 Experimental Results,"How many parameters are in the toy model (y = x^2) tree?",14 +https://arxiv.org/pdf/2210.05189,Scientific Paper,3 Experimental Results,"How many layers are in the half-moon neural network?",3 +https://arxiv.org/pdf/2210.05189,Scientific Paper,3 Experimental Results,"What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy",B +https://arxiv.org/pdf/2106.09685.pdf,"Scientific Report",4 OUR METHOD,Does LoRA work with any neural network containing dense layers?,YES +https://arxiv.org/pdf/2106.09685.pdf,"Scientific Report",LORA ABSTRACT,"By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x",C +https://arxiv.org/pdf/2106.09685.pdf,"Scientific Report",1 INTRODUCTION,"In billions, how many trainable parameters does GPT-3 have?",175 +https://arxiv.org/pdf/2106.09685.pdf,"Scientific Report",1 INTRODUCTION,Does LoRA introduce additional inference latency compared to full fine-tuning?,NO +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,"Techincal Documentation",SECTION I. INTRODUCTION,"According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license",B +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,"Techincal Documentation",HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?,"how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000",A +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,"Techincal Documentation",SECTION I. INTRODUCTION,Does open access eliminate price barriers?,YES +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,"Techincal Documentation",OVERCOMING RESERVATIONS ABOUT OPEN ACCESS,Are publication fees required for all open access journals?,NO +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,"Techincal Documentation",WHY ARE OPEN ACCESS POLICIES ADOPTED?,In what year did the Bill and Melinda Gates foundation implement an open access policy?,2015 +https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf,"Techincal Documentation",CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?,Are Gold Open Access and Green Open Access mutually exclusive.,NO +https://arxiv.org/pdf/2201.11903,"Scientific Report",3 Arithmetic Reasoning,Is Arithmetic reasoning is a task that language models often find very easy?,NO +https://arxiv.org/pdf/2201.11903,"Scientific Report",3.1 Experimental Setup,How many large language models were evaluated?,5 +https://arxiv.org/pdf/2201.11903,"Scientific Report",3.1 Experimental Setup,How many benchmarks were used to evaluate arithmetic reasoning?,5 +https://arxiv.org/pdf/2201.11903,"Scientific Report",5 Symbolic Reasoning,Is symbolic reasoning usually simple for humans but challenging for language models?,YES +https://arxiv.org/pdf/2201.11903,"Scientific Report",5 Symbolic Reasoning,How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4,B +https://arxiv.org/pdf/2201.11903,"Scientific Report",5 Symbolic Reasoning,"Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles",A +https://arxiv.org/pdf/2201.11903,"Scientific Report",3.4 Robustness of Chain of Thought,How many annotators provided independent chains of thought?,3 +https://arxiv.org/pdf/2201.11903,"Scientific Report",3.2 Results,How many random samples were examined to understand model performance?,100 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",OVERVIEW AND GOAL,How many chapters does the game last?,3 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",OVERVIEW AND GOAL,How many victory conditions are there?,3 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CARD AND TILE EFFECTS,How many different races are there?,6 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CHAPTER OVERVIEW,Which player begins the game? -A: Sauron -B: The Fellowship -C: Other,A +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CHAPTER OVERVIEW,Can you take a Chapter card and a Landmark tile on your same turn?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CHAPTER OVERVIEW,How many goins does a player take when discarding a card during Chapter 3?,3 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CHAPTER OVERVIEW,"After taking a landmark tile, do you reveal a new tile and the end of your turn?",NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CARD AND TILE COSTS,Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CARD AND TILE COSTS,"If a player is missing 2 skill symbols, how many coins must they pay to the reserve?",2 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CARD AND TILE EFFECTS,Can you use a symbol more than once per turn?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CARD AND TILE EFFECTS,Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue,B +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CARD AND TILE EFFECTS,During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3,C +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",CONQUERING MIDDLE-EARTH,"If you place or move an unit and an enemy fortress is present, does it trigger a conflict?",NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",END OF THE GAME,Can the game end in a tie?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf,"Board Game",END OF THE GAME,In how many regions do you need to be present to win the game?,7 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",LOOKOUT PHASE,What is the maximum number of cards a player may acquire during the lookout phase?,4 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",LOOKOUT PHASE,Is there a limit to the number of cards a player may have in their hand?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",ACTION PHASE,"Can you raid the locations of a player that has passed during the action phase?",NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",ACTION PHASE,How many points in the scoreboard must be reached during the Action phase to trigger the final round?,25 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",EXPEDITION PHASE,Can players conquer and pillage the same island during the expedition phase?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",EXPEDITION PHASE,Do you need a fish to conquer a distant island?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",EXPEDITION PHASE,How many victory points you get from each conquered island?,1 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",CLEANUP PHASE,Is there a cleanup phase in the final round?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",LOCATION ABILITIES,How many victory points are granted by a built Field Location card that work as an upgrade?,1 +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",LOCATION ABILITIES,Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",RAID,Can you use the raid action without a Raze token?,NO +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",GAME END,Can the game end in a tie?,YES +https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf,"Board Game",GAME END,"If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie",A +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,"Regulation",2.1. Toilets,"Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold",B +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,"Regulation",CARBON MONOXIDE DETECTION AND VENTING,"In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground",C +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,"Regulation",4.1. Natural lighting,"What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%",A +https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf,"Regulation",1.2.1. Internal partitions and doors,"What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",5.2. Thread Hierarchy,"What is the maximum number of threads within a thread block?",1024 +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",5.2. Thread Hierarchy,"Can you identify a thread with a four-dimensional index?",NO +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",6.1.1. Compilation Workflow,"In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.",B +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",6.1.1. Compilation Workflow,"What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.",B +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",6.1.1. Compilation Workflow,"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.",C +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",6.1.1. Compilation Workflow,"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",15.3. API Fundamentals,"When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.",C +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",15.3. API Fundamentals,"What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",15.3. API Fundamentals,"When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.",B +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",15.3. API Fundamentals,"How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.",C +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",23.1. What is Lazy Loading?,"What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.",A +https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf,"Techincal Documentation",23.1. What is Lazy Loading?,"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?",NO +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",Risk Perception,"which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",Risk Perception,"In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.",C +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",Training Cost,"What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.",A +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",Training Cost,"How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.",C +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",LLM Tokenization Introduces Unfairness,"What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.",A +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",LLM Tokenization Introduces Unfairness,"What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",U.S. Regulation,"How many AI-related regulations were enacted in the United States in 2023?",25 +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",U.S. Regulation,"Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",Europe,"Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom",B +https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf,"Scientific Report",Europe,"Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.",C +https://arxiv.org/pdf/2302.13971,"Scientific Report","2.1 Pre-training Data","What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%",A +https://arxiv.org/pdf/2302.13971,"Scientific Report","2.1 Pre-training Data","How many languages did the Wikipedia data cover?",20 +https://arxiv.org/pdf/2302.13971,"Scientific Report","2.3 Optimizer","What optimizer was used for training? -A: AdamW -B: Adam -C: SGD",A +https://arxiv.org/pdf/2302.13971,"Scientific Report","2.3 Optimizer","What value was used for the weight decay?",0.1 +https://arxiv.org/pdf/2302.13971,"Scientific Report","3 Main results","How many benchmarks were tested?",20 +https://arxiv.org/pdf/2302.13971,"Scientific Report","3 Main results","Was the model compared against GPT-4?",NO +https://arxiv.org/pdf/2302.13971,"Scientific Report","5 Bias, Toxicity and Misinformation","Can LLMs re-produce biases that exist in training data?",YES +https://arxiv.org/pdf/2302.13971,"Scientific Report","5 Bias, Toxicity and Misinformation","Do authors consider the evaluations enough to fully comprehend the risks of the model?",NO +https://arxiv.org/pdf/2302.13971,"Scientific Report","5.2 CrowS-Pairs","Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?",NO +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Regulation","Limitations of generative AI and LLMs","Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization",C +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Regulation","Limitations of generative AI and LLMs","Can LLMs be used as an alternative to visiting a doctor?",NO +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Regulation","Procurement in an emerging market","Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?",C +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Regulation","Accountability and responsibility","what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.",B +https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf,"Regulation","Accountability and responsibility","What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.",B From 97049d67d83ec6129569d442bd365c7a5e490578 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 4 Feb 2025 14:53:59 +0100 Subject: [PATCH 111/120] Update gemini prompt and count --- benchmark/gemini_RAGatouille.ipynb | 1792 +++++----- benchmark/gemini_find_retrieve_answer.ipynb | 3248 ++++++++++++++----- benchmark/gemini_full_context.ipynb | 52 +- benchmark/gemini_perfect_context.ipynb | 97 +- 4 files changed, 3442 insertions(+), 1747 deletions(-) diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb index 61f1cf4..c74d753 100644 --- a/benchmark/gemini_RAGatouille.ipynb +++ b/benchmark/gemini_RAGatouille.ipynb @@ -63,8 +63,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "GPU is available!\n" ] @@ -100,8 +100,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Collecting ragatouille\n", " Downloading ragatouille-0.0.8.post4-py3-none-any.whl.metadata (15 kB)\n", @@ -391,8 +391,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-fztvdq23\n", @@ -508,8 +508,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "--2025-01-29 09:01:27-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", @@ -678,28 +678,20 @@ "outputs": [], "source": [ "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with a single key: \"answer\".\n", - "In `\"answer\"`, you will return the answer using one of the following JSON types:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"answer\": 12\n", - "}\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model?\n", - "-A: ReLU\n", - "-B: Sigmoid\n", - "-C: Tanh\n", - "{\n", - " \"answer\": \"C\"\n", - "}\n", "\"\"\"" ] }, @@ -711,13 +703,7 @@ }, "outputs": [], "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")\n", + "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=SYSTEM_PROMPT)\n", "model.n = 0" ] }, @@ -822,8 +808,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "\u001b[32m2025-01-29 09:01:52.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", "\u001b[32m2025-01-29 09:01:53.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", @@ -838,121 +824,121 @@ ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - "artifact.metadata: 0%| | 0.00/1.63k [00:00\n", @@ -2405,13 +2357,47 @@ "
\n", "
\n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 7,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18,\n \"min\": 22,\n \"max\": 83,\n \"num_unique_values\": 7,\n \"samples\": [\n 22,\n 44,\n 66\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Classification of general-purpose AI models as general-purpose AI models with systemic risk\",\n \"3.2 Results\",\n \"15.3. API Fundamentals\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\",\n \"How many random samples were examined to understand model performance?\",\n \"How many victory points you get from each conquered island?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"C\",\n \"100\",\n \"1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"NO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCUMENT.\",\n \"50\",\n \"THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM PILLAGING ISLANDS BUT NOT HOW MANY VICTORY POINTS EACH CONQUERED ISLAND GIVES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } + "text/plain": [ + " Unnamed: 0 document \\\n", + "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", + "44 44 https://arxiv.org/pdf/2201.11903 \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "66 66 https://github.com/mozilla-ai/structured-qa/re... \n", + "83 83 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "\n", + " section \\\n", + "22 Classification of general-purpose AI models as... \n", + "44 3.2 Results \n", + "47 CARD AND TILE EFFECTS \n", + "52 CARD AND TILE COSTS \n", + "55 CARD AND TILE EFFECTS \n", + "66 EXPEDITION PHASE \n", + "83 15.3. API Fundamentals \n", + "\n", + " question answer \\\n", + "22 What is the threshold, measured in floating po... C \n", + "44 How many random samples were examined to under... 100 \n", + "47 How many different races are there? 6 \n", + "52 Can a player pay coins to compensate for missi... YES \n", + "55 Which type of cards provide coins? -A: Gray -B... B \n", + "66 How many victory points you get from each conq... 1 \n", + "83 When are virtual addresses assigned to graph a... C \n", + "\n", + " pred_answer pred_section \n", + "22 NO SPECIFIC THRESHOLD IS MENTIONED IN THE DOCU... NaN \n", + "44 50 NaN \n", + "47 7 NaN \n", + "52 NO NaN \n", + "55 NONE OF THE ABOVE NaN \n", + "66 THE DOCUMENT SAYS THAT PLAYERS GAIN VPS FROM P... NaN \n", + "83 A NaN " + ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 13 + "output_type": "execute_result" } ], "source": [ @@ -2431,14 +2417,14 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.9292929292929293" ] }, + "execution_count": 14, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], "source": [ @@ -2472,32 +2458,83 @@ }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "20d67e8902244d87ad72120b9fb71284": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", + "004ad74940344b6eb376ae4cfc85f26b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1e7fcaa0156d4f09af4bf24a8607e787", - "IPY_MODEL_0bad96f6403c4042a9ed7bb491c1b25d", - "IPY_MODEL_1c9e0ff5abab4e378959f47c5655f9f7" - ], - "layout": "IPY_MODEL_dd0ddf2594eb42b4babe6eeaf6a59bbb" - } - }, - "1e7fcaa0156d4f09af4bf24a8607e787": { + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "024598891b4f46299dc20b5cfd714e0c": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_45ce30572c20425691ebdabe0696b0ec", + "placeholder": "​", + "style": "IPY_MODEL_667241a7a4e6442b9e32450dbcbb0f56", + "value": "tokenizer.json: 100%" + } + }, + "066c98c9848e4e00b68d0e98ec6f3c1f": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2509,16 +2546,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_5e532f20ae6d4a5c90d5beba1518d3ee", + "layout": "IPY_MODEL_c8939bbe84c24ff8ad43c8d996d29af2", "placeholder": "​", - "style": "IPY_MODEL_50215024305b41c38aec0a3808b3bc84", - "value": "artifact.metadata: 100%" + "style": "IPY_MODEL_9a8317a8c8754d4d8b513a7fb0366c8d", + "value": "config.json: 100%" } }, "0bad96f6403c4042a9ed7bb491c1b25d": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2539,83 +2576,26 @@ "value": 1633 } }, - "1c9e0ff5abab4e378959f47c5655f9f7": { + "0bebf69871bb4d04a5329ecb32d64b06": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e3084de2764a48089029ffafa1087e8a", - "placeholder": "​", - "style": "IPY_MODEL_420798f709e2420d81d7223c34ca442e", - "value": " 1.63k/1.63k [00:00<00:00, 72.1kB/s]" - } - }, - "dd0ddf2594eb42b4babe6eeaf6a59bbb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "5e532f20ae6d4a5c90d5beba1518d3ee": { + "11c36278698f4a6e8f606811eaff2166": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2664,10 +2644,10 @@ "width": null } }, - "50215024305b41c38aec0a3808b3bc84": { + "13dd434100e747588f8be140f55305a3": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2679,78 +2659,82 @@ "description_width": "" } }, - "a42220b511f14cd8b89f5071c0d216a4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "1601603b8da04598b2a3b1b6532b9de9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "description_width": "" } }, - "46097609bd4b46fa94c27a5dcfe98a1a": { + "167d14dc1f3b42fe9f4d9cc2ec341363": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "e3084de2764a48089029ffafa1087e8a": { + "1c9e0ff5abab4e378959f47c5655f9f7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e3084de2764a48089029ffafa1087e8a", + "placeholder": "​", + "style": "IPY_MODEL_420798f709e2420d81d7223c34ca442e", + "value": " 1.63k/1.63k [00:00<00:00, 72.1kB/s]" + } + }, + "1e7fcaa0156d4f09af4bf24a8607e787": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5e532f20ae6d4a5c90d5beba1518d3ee", + "placeholder": "​", + "style": "IPY_MODEL_50215024305b41c38aec0a3808b3bc84", + "value": "artifact.metadata: 100%" + } + }, + "1eda4198a078469dbba236c3ed8654c3": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2799,25 +2783,10 @@ "width": null } }, - "420798f709e2420d81d7223c34ca442e": { + "20d67e8902244d87ad72120b9fb71284": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8209dde69d4147739c522342bfedcccd": { - "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2829,62 +2798,17 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_066c98c9848e4e00b68d0e98ec6f3c1f", - "IPY_MODEL_c88515f583bd469ca0d6ca54a812ca14", - "IPY_MODEL_a47e31ce610b4dcf8ac934ec11aefc65" + "IPY_MODEL_1e7fcaa0156d4f09af4bf24a8607e787", + "IPY_MODEL_0bad96f6403c4042a9ed7bb491c1b25d", + "IPY_MODEL_1c9e0ff5abab4e378959f47c5655f9f7" ], - "layout": "IPY_MODEL_38bd9b6cec8f42f1a9b2caca71478f4b" - } - }, - "066c98c9848e4e00b68d0e98ec6f3c1f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c8939bbe84c24ff8ad43c8d996d29af2", - "placeholder": "​", - "style": "IPY_MODEL_9a8317a8c8754d4d8b513a7fb0366c8d", - "value": "config.json: 100%" + "layout": "IPY_MODEL_dd0ddf2594eb42b4babe6eeaf6a59bbb" } }, - "c88515f583bd469ca0d6ca54a812ca14": { + "24ae74e4073749fba785b660dac48f4c": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ea0ed18363ec4a86b0383e0b43d38ac7", - "max": 743, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_dfb0d5f3c9ae46dc910d335a9215521a", - "value": 743 - } - }, - "a47e31ce610b4dcf8ac934ec11aefc65": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2896,16 +2820,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_a2a6f8043e9943c7a6ec9112ac3d33bd", + "layout": "IPY_MODEL_944a78e6adaf4e3a87551d0bd5a6fc75", "placeholder": "​", - "style": "IPY_MODEL_8d18d3f17569471fade4a2df380a245c", - "value": " 743/743 [00:00<00:00, 64.1kB/s]" + "style": "IPY_MODEL_8d7d0da8d2344625aeef3d1c452a9c68", + "value": "special_tokens_map.json: 100%" } }, - "38bd9b6cec8f42f1a9b2caca71478f4b": { + "266e8497e8b04e3fad5d23391960ed13": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2954,10 +2878,10 @@ "width": null } }, - "c8939bbe84c24ff8ad43c8d996d29af2": { + "27ec9d176d11451bb049b62c278a86ff": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3006,25 +2930,48 @@ "width": null } }, - "9a8317a8c8754d4d8b513a7fb0366c8d": { + "29d523b694174b7596944eeb86a553d0": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_73d7ade0b58b41d1b1ac109026eeabc8", + "IPY_MODEL_cc1c0fcd84b94a199612c3e7ccd906cd", + "IPY_MODEL_5dbe5cc9d6e64e5cb62e7018a42e1f8e" + ], + "layout": "IPY_MODEL_56de5716ee0146158e399759aef55c41" + } + }, + "2a59d91e7621422ebda4fefca0ee6760": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "ea0ed18363ec4a86b0383e0b43d38ac7": { + "38bd9b6cec8f42f1a9b2caca71478f4b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3068,31 +3015,15 @@ "overflow_y": null, "padding": null, "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "dfb0d5f3c9ae46dc910d335a9215521a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "top": null, + "visibility": null, + "width": null } }, - "a2a6f8043e9943c7a6ec9112ac3d33bd": { + "3919381f1ae247219c7e4378a5d2e1ff": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3141,25 +3072,10 @@ "width": null } }, - "8d18d3f17569471fade4a2df380a245c": { + "3bd3d79c0262467296061f64606e57ce": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "29d523b694174b7596944eeb86a553d0": { - "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3171,83 +3087,32 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_73d7ade0b58b41d1b1ac109026eeabc8", - "IPY_MODEL_cc1c0fcd84b94a199612c3e7ccd906cd", - "IPY_MODEL_5dbe5cc9d6e64e5cb62e7018a42e1f8e" + "IPY_MODEL_024598891b4f46299dc20b5cfd714e0c", + "IPY_MODEL_9846ac95a9864f6aad40bffcd1595c48", + "IPY_MODEL_f7e1a279ca7a4576a67d600c6e0fcad6" ], - "layout": "IPY_MODEL_56de5716ee0146158e399759aef55c41" - } - }, - "73d7ade0b58b41d1b1ac109026eeabc8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7f05356467fa4c2ab321004efa06e9c9", - "placeholder": "​", - "style": "IPY_MODEL_ed08c56e20194dbca6732642fb4af466", - "value": "model.safetensors: 100%" - } - }, - "cc1c0fcd84b94a199612c3e7ccd906cd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_266e8497e8b04e3fad5d23391960ed13", - "max": 438349816, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0bebf69871bb4d04a5329ecb32d64b06", - "value": 438349816 + "layout": "IPY_MODEL_1eda4198a078469dbba236c3ed8654c3" } }, - "5dbe5cc9d6e64e5cb62e7018a42e1f8e": { + "420798f709e2420d81d7223c34ca442e": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_be5d5dcca8cb498d8eb982b1cc1273fc", - "placeholder": "​", - "style": "IPY_MODEL_f6e97fcb881443beaec839bd64530d2d", - "value": " 438M/438M [00:02<00:00, 248MB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "56de5716ee0146158e399759aef55c41": { + "44b4d7daccdb46f19db7675c3a7d4f49": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3296,10 +3161,10 @@ "width": null } }, - "7f05356467fa4c2ab321004efa06e9c9": { + "45ce30572c20425691ebdabe0696b0ec": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3348,25 +3213,26 @@ "width": null } }, - "ed08c56e20194dbca6732642fb4af466": { + "46097609bd4b46fa94c27a5dcfe98a1a": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "266e8497e8b04e3fad5d23391960ed13": { + "4e15263fae0140299c6a55ce95f7bd43": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3415,26 +3281,10 @@ "width": null } }, - "0bebf69871bb4d04a5329ecb32d64b06": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "be5d5dcca8cb498d8eb982b1cc1273fc": { + "4f1165cdc7ef4701889d0e6de6ac9ed1": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3483,10 +3333,10 @@ "width": null } }, - "f6e97fcb881443beaec839bd64530d2d": { + "50215024305b41c38aec0a3808b3bc84": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3498,10 +3348,10 @@ "description_width": "" } }, - "d1b166882cef441c816a75b784b3dcb0": { + "54af3da7793c404fa8b4e1062185ea68": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3513,83 +3363,69 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_5f145f7ffcd540149cd775f01e3da418", - "IPY_MODEL_6766b3d159fd4c29b853f3ad44616429", - "IPY_MODEL_e3ec24ca9f384b6e8a6b25f66c9a2872" + "IPY_MODEL_24ae74e4073749fba785b660dac48f4c", + "IPY_MODEL_895f37ac364f4c1aa4b3089fa286fca3", + "IPY_MODEL_f63e1751a94246888bf0426a2288cb36" ], - "layout": "IPY_MODEL_5b034562b2354e70a27bc06f5fe674cd" - } - }, - "5f145f7ffcd540149cd775f01e3da418": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f76cfc2d293d4b409e4fc8bfa805af96", - "placeholder": "​", - "style": "IPY_MODEL_167d14dc1f3b42fe9f4d9cc2ec341363", - "value": "tokenizer_config.json: 100%" - } - }, - "6766b3d159fd4c29b853f3ad44616429": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3919381f1ae247219c7e4378a5d2e1ff", - "max": 405, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2a59d91e7621422ebda4fefca0ee6760", - "value": 405 + "layout": "IPY_MODEL_90076a55ec674636b93c7b1d741ea374" } }, - "e3ec24ca9f384b6e8a6b25f66c9a2872": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", + "56de5716ee0146158e399759aef55c41": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d44706bfc8494edc8f266d3a94ff16a2", - "placeholder": "​", - "style": "IPY_MODEL_13dd434100e747588f8be140f55305a3", - "value": " 405/405 [00:00<00:00, 30.5kB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, "5b034562b2354e70a27bc06f5fe674cd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3638,10 +3474,31 @@ "width": null } }, - "f76cfc2d293d4b409e4fc8bfa805af96": { + "5dbe5cc9d6e64e5cb62e7018a42e1f8e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_be5d5dcca8cb498d8eb982b1cc1273fc", + "placeholder": "​", + "style": "IPY_MODEL_f6e97fcb881443beaec839bd64530d2d", + "value": " 438M/438M [00:02<00:00, 248MB/s]" + } + }, + "5e532f20ae6d4a5c90d5beba1518d3ee": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3690,10 +3547,31 @@ "width": null } }, - "167d14dc1f3b42fe9f4d9cc2ec341363": { + "5f145f7ffcd540149cd775f01e3da418": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f76cfc2d293d4b409e4fc8bfa805af96", + "placeholder": "​", + "style": "IPY_MODEL_167d14dc1f3b42fe9f4d9cc2ec341363", + "value": "tokenizer_config.json: 100%" + } + }, + "667241a7a4e6442b9e32450dbcbb0f56": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3705,10 +3583,100 @@ "description_width": "" } }, - "3919381f1ae247219c7e4378a5d2e1ff": { + "6766b3d159fd4c29b853f3ad44616429": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3919381f1ae247219c7e4378a5d2e1ff", + "max": 405, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2a59d91e7621422ebda4fefca0ee6760", + "value": 405 + } + }, + "67f985db0d7b41f7b15f135d6acb039e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_97d1348ebec44687ac2a9151d52b1e8f", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ae77818599cd4bc2ac761865e81c3f15", + "value": 231508 + } + }, + "6da62a5ad31940329f00748ad6eab4da": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_44b4d7daccdb46f19db7675c3a7d4f49", + "placeholder": "​", + "style": "IPY_MODEL_ee9dca0e0f2c49a49fb50b623818cda9", + "value": " 232k/232k [00:00<00:00, 1.77MB/s]" + } + }, + "73d7ade0b58b41d1b1ac109026eeabc8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f05356467fa4c2ab321004efa06e9c9", + "placeholder": "​", + "style": "IPY_MODEL_ed08c56e20194dbca6732642fb4af466", + "value": "model.safetensors: 100%" + } + }, + "747558448b5e40038b270a6a6f6af6f0": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3757,26 +3725,31 @@ "width": null } }, - "2a59d91e7621422ebda4fefca0ee6760": { + "7d4aa0529fb74e81a08cc12aeb243456": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4e15263fae0140299c6a55ce95f7bd43", + "placeholder": "​", + "style": "IPY_MODEL_cacdb3a3a0e04ca3b744fb82a3dcc925", + "value": "vocab.txt: 100%" } }, - "d44706bfc8494edc8f266d3a94ff16a2": { + "7f05356467fa4c2ab321004efa06e9c9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3825,25 +3798,10 @@ "width": null } }, - "13dd434100e747588f8be140f55305a3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, "81a8270d87ef4c9b80c46c5236c8292f": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3862,31 +3820,48 @@ "layout": "IPY_MODEL_27ec9d176d11451bb049b62c278a86ff" } }, - "7d4aa0529fb74e81a08cc12aeb243456": { + "81ee5fe4f8044ab9819b9f767c41826e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8209dde69d4147739c522342bfedcccd": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4e15263fae0140299c6a55ce95f7bd43", - "placeholder": "​", - "style": "IPY_MODEL_cacdb3a3a0e04ca3b744fb82a3dcc925", - "value": "vocab.txt: 100%" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_066c98c9848e4e00b68d0e98ec6f3c1f", + "IPY_MODEL_c88515f583bd469ca0d6ca54a812ca14", + "IPY_MODEL_a47e31ce610b4dcf8ac934ec11aefc65" + ], + "layout": "IPY_MODEL_38bd9b6cec8f42f1a9b2caca71478f4b" } }, - "67f985db0d7b41f7b15f135d6acb039e": { + "895f37ac364f4c1aa4b3089fa286fca3": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3899,91 +3874,48 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_97d1348ebec44687ac2a9151d52b1e8f", - "max": 231508, + "layout": "IPY_MODEL_747558448b5e40038b270a6a6f6af6f0", + "max": 112, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_ae77818599cd4bc2ac761865e81c3f15", - "value": 231508 + "style": "IPY_MODEL_81ee5fe4f8044ab9819b9f767c41826e", + "value": 112 + } + }, + "8d18d3f17569471fade4a2df380a245c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "6da62a5ad31940329f00748ad6eab4da": { + "8d7d0da8d2344625aeef3d1c452a9c68": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_44b4d7daccdb46f19db7675c3a7d4f49", - "placeholder": "​", - "style": "IPY_MODEL_ee9dca0e0f2c49a49fb50b623818cda9", - "value": " 232k/232k [00:00<00:00, 1.77MB/s]" - } - }, - "27ec9d176d11451bb049b62c278a86ff": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "description_width": "" } }, - "4e15263fae0140299c6a55ce95f7bd43": { + "90076a55ec674636b93c7b1d741ea374": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4032,25 +3964,10 @@ "width": null } }, - "cacdb3a3a0e04ca3b744fb82a3dcc925": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "97d1348ebec44687ac2a9151d52b1e8f": { + "944a78e6adaf4e3a87551d0bd5a6fc75": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4099,26 +4016,10 @@ "width": null } }, - "ae77818599cd4bc2ac761865e81c3f15": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "44b4d7daccdb46f19db7675c3a7d4f49": { + "97d1348ebec44687ac2a9151d52b1e8f": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4167,68 +4068,10 @@ "width": null } }, - "ee9dca0e0f2c49a49fb50b623818cda9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3bd3d79c0262467296061f64606e57ce": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_024598891b4f46299dc20b5cfd714e0c", - "IPY_MODEL_9846ac95a9864f6aad40bffcd1595c48", - "IPY_MODEL_f7e1a279ca7a4576a67d600c6e0fcad6" - ], - "layout": "IPY_MODEL_1eda4198a078469dbba236c3ed8654c3" - } - }, - "024598891b4f46299dc20b5cfd714e0c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_45ce30572c20425691ebdabe0696b0ec", - "placeholder": "​", - "style": "IPY_MODEL_667241a7a4e6442b9e32450dbcbb0f56", - "value": "tokenizer.json: 100%" - } - }, "9846ac95a9864f6aad40bffcd1595c48": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4249,31 +4092,25 @@ "value": 466081 } }, - "f7e1a279ca7a4576a67d600c6e0fcad6": { + "9a8317a8c8754d4d8b513a7fb0366c8d": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_004ad74940344b6eb376ae4cfc85f26b", - "placeholder": "​", - "style": "IPY_MODEL_cb69dbb882694ed3bab1a2b35e0df524", - "value": " 466k/466k [00:00<00:00, 3.50MB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "1eda4198a078469dbba236c3ed8654c3": { + "a2a6f8043e9943c7a6ec9112ac3d33bd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4322,10 +4159,10 @@ "width": null } }, - "45ce30572c20425691ebdabe0696b0ec": { + "a42220b511f14cd8b89f5071c0d216a4": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4374,25 +4211,47 @@ "width": null } }, - "667241a7a4e6442b9e32450dbcbb0f56": { + "a47e31ce610b4dcf8ac934ec11aefc65": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a2a6f8043e9943c7a6ec9112ac3d33bd", + "placeholder": "​", + "style": "IPY_MODEL_8d18d3f17569471fade4a2df380a245c", + "value": " 743/743 [00:00<00:00, 64.1kB/s]" + } + }, + "ae77818599cd4bc2ac761865e81c3f15": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "11c36278698f4a6e8f606811eaff2166": { + "be5d5dcca8cb498d8eb982b1cc1273fc": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4441,26 +4300,34 @@ "width": null } }, - "c8a050cfb1164c1cbecb0a86bc555d9c": { + "c88515f583bd469ca0d6ca54a812ca14": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ea0ed18363ec4a86b0383e0b43d38ac7", + "max": 743, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dfb0d5f3c9ae46dc910d335a9215521a", + "value": 743 } }, - "004ad74940344b6eb376ae4cfc85f26b": { + "c8939bbe84c24ff8ad43c8d996d29af2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4509,68 +4376,56 @@ "width": null } }, - "cb69dbb882694ed3bab1a2b35e0df524": { + "c8a050cfb1164c1cbecb0a86bc555d9c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "54af3da7793c404fa8b4e1062185ea68": { + "cacdb3a3a0e04ca3b744fb82a3dcc925": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_24ae74e4073749fba785b660dac48f4c", - "IPY_MODEL_895f37ac364f4c1aa4b3089fa286fca3", - "IPY_MODEL_f63e1751a94246888bf0426a2288cb36" - ], - "layout": "IPY_MODEL_90076a55ec674636b93c7b1d741ea374" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "24ae74e4073749fba785b660dac48f4c": { + "cb69dbb882694ed3bab1a2b35e0df524": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_944a78e6adaf4e3a87551d0bd5a6fc75", - "placeholder": "​", - "style": "IPY_MODEL_8d7d0da8d2344625aeef3d1c452a9c68", - "value": "special_tokens_map.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "895f37ac364f4c1aa4b3089fa286fca3": { + "cc1c0fcd84b94a199612c3e7ccd906cd": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4583,39 +4438,40 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_747558448b5e40038b270a6a6f6af6f0", - "max": 112, + "layout": "IPY_MODEL_266e8497e8b04e3fad5d23391960ed13", + "max": 438349816, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_81ee5fe4f8044ab9819b9f767c41826e", - "value": 112 + "style": "IPY_MODEL_0bebf69871bb4d04a5329ecb32d64b06", + "value": 438349816 } }, - "f63e1751a94246888bf0426a2288cb36": { + "d1b166882cef441c816a75b784b3dcb0": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4f1165cdc7ef4701889d0e6de6ac9ed1", - "placeholder": "​", - "style": "IPY_MODEL_1601603b8da04598b2a3b1b6532b9de9", - "value": " 112/112 [00:00<00:00, 8.30kB/s]" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5f145f7ffcd540149cd775f01e3da418", + "IPY_MODEL_6766b3d159fd4c29b853f3ad44616429", + "IPY_MODEL_e3ec24ca9f384b6e8a6b25f66c9a2872" + ], + "layout": "IPY_MODEL_5b034562b2354e70a27bc06f5fe674cd" } }, - "90076a55ec674636b93c7b1d741ea374": { + "d44706bfc8494edc8f266d3a94ff16a2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4664,10 +4520,10 @@ "width": null } }, - "944a78e6adaf4e3a87551d0bd5a6fc75": { + "dd0ddf2594eb42b4babe6eeaf6a59bbb": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4716,25 +4572,26 @@ "width": null } }, - "8d7d0da8d2344625aeef3d1c452a9c68": { + "dfb0d5f3c9ae46dc910d335a9215521a": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "747558448b5e40038b270a6a6f6af6f0": { + "e3084de2764a48089029ffafa1087e8a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4783,26 +4640,31 @@ "width": null } }, - "81ee5fe4f8044ab9819b9f767c41826e": { + "e3ec24ca9f384b6e8a6b25f66c9a2872": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d44706bfc8494edc8f266d3a94ff16a2", + "placeholder": "​", + "style": "IPY_MODEL_13dd434100e747588f8be140f55305a3", + "value": " 405/405 [00:00<00:00, 30.5kB/s]" } }, - "4f1165cdc7ef4701889d0e6de6ac9ed1": { + "ea0ed18363ec4a86b0383e0b43d38ac7": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4851,10 +4713,61 @@ "width": null } }, - "1601603b8da04598b2a3b1b6532b9de9": { + "ed08c56e20194dbca6732642fb4af466": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ee9dca0e0f2c49a49fb50b623818cda9": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f63e1751a94246888bf0426a2288cb36": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f1165cdc7ef4701889d0e6de6ac9ed1", + "placeholder": "​", + "style": "IPY_MODEL_1601603b8da04598b2a3b1b6532b9de9", + "value": " 112/112 [00:00<00:00, 8.30kB/s]" + } + }, + "f6e97fcb881443beaec839bd64530d2d": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4865,6 +4778,79 @@ "_view_name": "StyleView", "description_width": "" } + }, + "f76cfc2d293d4b409e4fc8bfa805af96": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7e1a279ca7a4576a67d600c6e0fcad6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_004ad74940344b6eb376ae4cfc85f26b", + "placeholder": "​", + "style": "IPY_MODEL_cb69dbb882694ed3bab1a2b35e0df524", + "value": " 466k/466k [00:00<00:00, 3.50MB/s]" + } } } } diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index a97ac07..658881f 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -1,798 +1,2536 @@ { - "cells": [ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9RKWbX7BHEgr" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PYuloevCHEgu" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgYAsUQWHEgv" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EbFAX4heHEgv" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2HoyF-xbHEgv", + "outputId": "70e679cc-b6be-4c19-ca1f-1148c8f5e27a" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "9RKWbX7BHEgr" - }, - "source": [ - "# Structured Q&A" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-q1o0cypa\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-q1o0cypa\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit a02ffd7c45a36261597af3f00a2316d7e349d05b\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting fire (from structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev109+ga02ffd7) (0.27.1)\n", + "Collecting loguru (from structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev109+ga02ffd7) (2.10.6)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev109+ga02ffd7) (6.0.2)\n", + "Collecting rapidfuzz (from structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev109+ga02ffd7) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev109+ga02ffd7) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev109+ga02ffd7) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m57.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m101.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m92.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m70.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev109+ga02ffd7-py3-none-any.whl size=13245 sha256=2f62af9114406a8689c20b7543363be36c644f43c872847130f9e4881dd31586\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-p89axgg2/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=b391db5f33e78d5df3c80cbcc1aa314fa35363c7b79f9301a83167bd7b2d5437\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.12.1 streamlit-1.41.1 structured-qa-0.3.3.dev109+ga02ffd7 watchdog-6.0.0\n" + ] + } + ], + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "p_hsSGafHEgw", + "outputId": "0fc4f4af-f5b5-46bd-8a5c-ad2fb956c5d0" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "PYuloevCHEgu" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-02-04 10:16:08-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21441 (21K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "structured_qa.csv 100%[===================>] 20.94K --.-KB/s in 0.001s \n", + "\n", + "2025-02-04 10:16:08 (13.8 MB/s) - ‘structured_qa.csv’ saved [21441/21441]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0MDfM6cyHEgx" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "5bLJE4U7HEgx" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", + "\n", + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "y3yUsRDWHEgy" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AgpODLeJHEgy" + }, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "n6d8F7cYHEgy" + }, + "outputs": [], + "source": [ + "from structured_qa.config import FIND_PROMPT\n", + "from structured_qa.preprocessing import document_to_sections_dir\n", + "from structured_qa.workflow import find_retrieve_answer\n", + "\n", + "\n", + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", + "- Single letter (for multiple-choice questions)\n", + "\"\"\"\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + " find_prompt: str = FIND_PROMPT,\n", + " answer_prompt: str = ANSWER_WITH_TYPE_PROMPT,\n", + "):\n", + " sections_dir = Path(\"sections\") / Path(document_file).stem\n", + " if not sections_dir.exists():\n", + " logger.info(\"Splitting document into sections\")\n", + " document_to_sections_dir(document_file, sections_dir)\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " answer, sections_checked = find_retrieve_answer(\n", + " question, model, sections_dir, find_prompt, answer_prompt\n", + " )\n", + " logger.info(f\"Answer: {answer}\")\n", + " answers[index] = answer\n", + " sections[index] = sections_checked[-1] if sections_checked else None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GdlWjANdHEgz" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "9zx8nCaZHEgz" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "U4R84hHRHEgz" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BEzqJJ1yHEgz" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "-qtPf9RmHEgz", + "outputId": "e03e8757-952c-45b7-9c19-b2e57cc46a2a" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "pgYAsUQWHEgv" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] + "metadata": { + "tags": null + }, + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:16:14.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-04 10:16:14.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:16:14.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:16:14.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:16:14.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "EbFAX4heHEgv" - }, - "source": [ - "## Installing dependencies" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing HAI_AI-Index-Report-2024.pdf.pdf...\n", + "[ ] (0/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 4/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 5/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 6/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 7/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 8/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 9/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 10/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 11/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 12/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 13/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 14/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 15/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 16/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 17/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 18/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 19/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 20/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 21/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 22/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 23/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 24/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 25/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 26/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 27/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 28/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 29/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 30/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 31/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 32/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 33/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 34/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 35/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 36/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 37/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 38/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 39/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 40/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 41/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 42/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 43/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 44/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 45/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 46/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 47/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 48/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 49/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 50/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 51/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 52/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 53/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 54/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 55/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 56/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 57/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 58/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 59/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 60/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 61/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 62/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 63/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 64/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 65/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 66/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 67/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 68/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 69/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 70/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 71/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 72/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 73/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 74/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 75/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 76/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 77/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 78/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 79/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 80/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 81/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 82/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 83/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 84/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 85/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 86/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 87/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 88/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 89/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 90/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 91/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 92/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 93/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 94/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 95/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 96/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 97/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 98/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 99/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (100/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (101/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (102/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (103/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (104/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (105/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (106/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (107/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (108/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (109/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (110/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (111/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (112/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (113/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (114/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (115/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (116/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (117/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (118/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (119/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (120/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (121/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (122/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (123/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (124/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (125/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (126/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (127/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (128/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (129/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (130/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (131/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (132/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (133/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (134/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (135/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (136/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (137/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (138/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (139/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (140/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (141/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (142/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (143/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (144/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (145/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (146/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (147/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (148/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (149/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (150/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (151/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (152/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (153/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (154/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (155/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (156/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (157/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (158/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (159/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (160/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (161/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (162/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (163/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (164/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (165/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (166/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (167/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (168/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (169/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (170/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (171/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (172/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (173/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (174/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (175/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (176/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (177/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (178/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (179/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (180/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (181/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (182/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (183/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (184/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (185/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (186/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (187/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (188/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (189/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (190/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (191/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (192/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (193/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (194/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (195/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (196/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (197/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (198/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (199/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (200/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (201/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (202/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (203/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (204/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (205/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (206/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (207/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (208/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (209/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (210/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (211/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (212/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (213/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (214/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (215/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (216/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (217/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (218/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (219/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (220/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (221/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (222/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (223/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (224/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (225/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (226/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (227/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (228/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (229/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (230/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (231/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (232/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (233/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (234/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (235/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (236/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (237/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (238/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (239/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (240/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (241/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (242/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (243/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (244/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (245/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (246/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (247/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (248/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (249/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (250/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (251/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (252/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (253/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (254/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (255/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (256/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (257/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (258/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (259/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (260/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (261/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (262/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (263/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (264/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (265/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (266/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (267/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (268/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (269/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (270/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (271/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (272/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (273/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (274/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (275/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (276/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (277/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (278/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (279/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (280/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (281/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (282/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (283/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (284/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (285/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (286/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (287/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (288/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (289/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (290/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (291/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (292/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (293/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (294/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (295/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (296/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (297/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (298/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (299/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (300/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (301/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (302/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (303/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (304/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (305/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (306/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (307/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (308/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (309/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (310/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (311/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (312/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (313/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (314/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (315/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (316/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (317/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (318/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (319/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (320/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (321/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (322/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (323/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (324/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (325/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (326/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (327/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (328/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (329/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (330/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (331/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (332/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (333/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (334/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (335/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (336/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (337/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (338/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (339/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (340/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (341/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (342/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (343/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (344/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (345/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (346/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (347/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (348/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (349/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (350/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (351/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (352/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (353/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (354/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (355/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (356/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (357/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (358/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (359/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (360/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (361/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (362/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (363/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (364/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (365/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (366/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (367/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (368/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (369/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (370/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (371/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (372/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (373/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (374/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (375/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (376/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (377/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (378/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (379/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (380/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (381/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (382/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (383/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (384/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (385/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (386/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (387/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (388/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (389/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (390/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (391/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (392/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (393/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (394/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (395/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (396/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (397/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (398/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (399/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (400/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (401/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (402/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (403/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (404/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (405/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (406/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (407/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (408/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (409/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (410/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (411/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (412/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (413/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (414/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (415/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (416/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (417/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (418/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (419/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (420/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (421/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (422/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (423/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (424/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (425/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (426/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (427/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (428/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (429/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (430/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (431/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (432/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (433/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (434/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (435/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (436/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (437/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (438/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (439/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (440/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (441/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (442/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (443/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (444/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (445/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (446/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (447/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (448/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (449/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (450/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (451/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (452/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (453/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (454/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (455/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (456/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (457/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (458/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (459/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (460/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (461/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (462/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (463/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (464/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (465/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (466/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (467/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (468/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (469/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (470/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (471/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (472/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (473/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (474/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (475/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (476/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (477/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (478/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (479/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (480/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (481/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (482/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (483/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (484/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (485/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (486/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (487/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (488/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (489/502)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (490/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (491/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (492/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (493/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (494/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (495/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (496/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (497/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (498/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (499/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (500/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (501/502)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (502/502)\b\b\b\b\b\b\b\b\b\b\b" + ] }, { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "2HoyF-xbHEgv", - "outputId": "ba13b9dc-18c6-4ed1-f82b-8a8a71445bd6", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-kwo0xd9n\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-kwo0xd9n\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 82f37f304be63e8096b40317eace08ff70ff0891\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (3.12.1)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev108+g82f37f3) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev108+g82f37f3) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev108+g82f37f3) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev108+g82f37f3) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev108+g82f37f3) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev108+g82f37f3) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev108+g82f37f3) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev108+g82f37f3) (1.17.0)\n", - "Building wheels for collected packages: structured-qa\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev108+g82f37f3-py3-none-any.whl size=13250 sha256=6563d68f35d99a2e5f1a8b9e0bff9eceae7a72c27b99dba8e6188901f5c1352b\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-09itl4q3/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - "Successfully built structured-qa\n", - "Installing collected packages: structured-qa\n", - " Attempting uninstall: structured-qa\n", - " Found existing installation: structured-qa 0.3.3.dev107+g4ea56e2\n", - " Uninstalling structured-qa-0.3.3.dev107+g4ea56e2:\n", - " Successfully uninstalled structured-qa-0.3.3.dev107+g4ea56e2\n", - "Successfully installed structured-qa-0.3.3.dev108+g82f37f3\n" - ] - } - ], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:21:38.611\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:38.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:38.763\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 84 sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:38.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:38.780\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:38.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:38.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:38.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 0\u001b[0m\n" + ] }, { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "p_hsSGafHEgw", - "outputId": "978e3aa8-8a08-46dd-b573-1fe9136aa4df", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2025-02-03 16:58:08-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 21441 (21K) [text/plain]\n", - "Saving to: ‘structured_qa.csv.1’\n", - "\n", - "\rstructured_qa.csv.1 0%[ ] 0 --.-KB/s \rstructured_qa.csv.1 100%[===================>] 20.94K --.-KB/s in 0s \n", - "\n", - "2025-02-03 16:58:08 (122 MB/s) - ‘structured_qa.csv.1’ saved [21441/21441]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "0MDfM6cyHEgx" - }, - "source": [ - "# Setup" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:21:51.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:52.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:53.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:55.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:55.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:55.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:56.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:57.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:57.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:57.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:58.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:21:59.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:22:00.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:22:00.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:01.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:02.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:04.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:05.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:06.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:06.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:06.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:07.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:08.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:09.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:10.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:10.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:10.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:23:10.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:18.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:20.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:20.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:21.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:22.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:23.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:25.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:25.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:25.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:26.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:27.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 25\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:27.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:27.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:28.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:24:28.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:30.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:30.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:30.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:31.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:32.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:34.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:35.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:36.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:37.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:38.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:40.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:25:40.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:41.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:42.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:45.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:46.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:47.736\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:49.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:50.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:51.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:52.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:26:52.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:27:53.955\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:27:54.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:27:56.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:27:57.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:27:58.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:27:59.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:28:00.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:28:01.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:28:03.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:28:03.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:04.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:05.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:08.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:09.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:11.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:11.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:11.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:12.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:13.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:14.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:29:14.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:15.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:17.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:18.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:19.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:20.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:21.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:22.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:24.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:25.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:25.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:25.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:25.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:25.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 1706.03762.pdf\u001b[0m\n" + ] }, { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "5bLJE4U7HEgx" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 1706.03762.pdf...\n", + "[ ] (0/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 1/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 2/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 3/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 4/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 5/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 6/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 7/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 8/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 9/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (10/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (11/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (12/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (13/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (14/15)\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (15/15)\b\b\b\b\b\b\b\b\b" + ] }, { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "y3yUsRDWHEgy" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:30:30.455\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.466\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 12 sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/1706.03762\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.474\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:30:30.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "AgpODLeJHEgy" - }, - "source": [ - "## Function to Process a single Document" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] }, { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "n6d8F7cYHEgy" - }, - "outputs": [], - "source": [ - "from structured_qa.config import FIND_PROMPT\n", - "from structured_qa.preprocessing import document_to_sections_dir\n", - "from structured_qa.workflow import find_retrieve_answer\n", - "\n", - "\n", - "\n", - "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", - "You are a rigorous assistant answering questions.\n", - "You must only answer based on the current information available which is:\n", - "\n", - "```\n", - "{CURRENT_INFO}\n", - "```\n", - "\n", - "If the current information available not enough to answer the question,\n", - "you must return \"I need more info\" srting and nothing else:\n", - "\n", - "If the current information is enough to answer, you must return one of the following formats:\n", - "- YES/NO (for boolean questions)\n", - "- Number (for numeric questions)\n", - "- Single letter (for multiple-choice questions)\n", - "\"\"\"\n", - "\n", - "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - " find_prompt: str = FIND_PROMPT,\n", - " answer_prompt: str = ANSWER_WITH_TYPE_PROMPT,\n", - "):\n", - " sections_dir = Path(\"sections\") / Path(document_file).stem\n", - " if not sections_dir.exists():\n", - " logger.info(\"Splitting document into sections\")\n", - " document_to_sections_dir(document_file, sections_dir)\n", - "\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " answer, sections_checked = find_retrieve_answer(\n", - " question, model, sections_dir, find_prompt, answer_prompt\n", - " )\n", - " logger.info(f\"Answer: {answer}\")\n", - " answers[index] = answer\n", - " sections[index] = sections_checked[-1] if sections_checked else None\n", - "\n", - " return answers, sections" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:31:31.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:33.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:33.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:33.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:36.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:36.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:36.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:37.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:39.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:39.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:39.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:40.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:42.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:42.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:42.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:43.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:31:43.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:45.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:45.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:45.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:47.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:48.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:48.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:48.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:50.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:52.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:52.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:52.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:53.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:54.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:54.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:54.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:56.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:57.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:57.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:57.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:32:57.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:33:58.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:00.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 4000\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:00.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:00.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:01.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:02.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:04.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:05.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:07.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:08.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 0.1\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:08.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:08.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:08.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:34:08.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2106.09685.pdf.pdf\u001b[0m\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "GdlWjANdHEgz" - }, - "source": [ - "## Load Model" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 2106.09685.pdf.pdf...\n", + "[ ] (0/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 1/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 2/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 3/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 4/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 5/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 6/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 7/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 8/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 9/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (10/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (11/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (12/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (13/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (14/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (15/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (16/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (17/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (18/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (19/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (20/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (21/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (22/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (23/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (24/26)\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (25/26)\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (26/26)\b\b\b\b\b\b\b\b\b" + ] }, { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "9zx8nCaZHEgz" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:35:27.059\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:27.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:27.073\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 22 sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:27.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:27.080\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:27.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:27.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:27.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n" + ] }, { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "U4R84hHRHEgz" - }, - "outputs": [], - "source": [ - "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=None)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "BEzqJJ1yHEgz" - }, - "source": [ - "# Run Benchmark" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:35:28.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:35:28.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:29.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:30.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:31.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:31.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:31.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:33.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:34.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:35.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:36.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:36.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:36.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:37.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:39.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:36:39.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:40.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:41.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:42.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:44.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:44.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:44.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:45.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:46.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:47.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:48.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:49.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:37:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:50.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:51.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:52.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:53.828\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:54.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:54.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:55.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:55.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:38:55.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2201.11903.pdf\u001b[0m\n" + ] }, { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "-qtPf9RmHEgz", - "outputId": "4930db62-5bc8-46ab-c51a-c4eca0ecd0d1", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\u001b[32m2025-02-03 16:58:11.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:11.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:11.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:11.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:11.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:11.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 0\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:19.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:20.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:22.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:24.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:24.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:24.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:26.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:28.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:28.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:28.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:29.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:31.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:32.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 16:58:32.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:34.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:36.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:38.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:39.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:41.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:42.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:45.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:46.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:48.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:48.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:48.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 16:59:48.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:00:49.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:00:51.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:00:52.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:00:56.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:00:58.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:00.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:00.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:00.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:03.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:06.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:06.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:06.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:08.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:01:08.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:09.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:09.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:09.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:11.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:13.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 25\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:13.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:13.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:14.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:17.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:17.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:17.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:18.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:19.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:22.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:23.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:02:23.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:24.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:28.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:30.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:32.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:33.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:35.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:35.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:35.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:37.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:38.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:39.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:03:39.828\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:41.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:42.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:44.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:46.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:48.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:49.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:51.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:53.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:55.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:55.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:55.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 1706.03762.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:55.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:55.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:55.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:04:55.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:05:56.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:05:58.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:05:58.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-03 17:05:58.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:04.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:06.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:06.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:06.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:07.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:09.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:09.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:09.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:12.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:13.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:13.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:13.880\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:15.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:06:15.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:16.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:16.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:16.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:17.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:19.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:19.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:19.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:20.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:21.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:21.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:21.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:23.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:24.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:24.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:24.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:25.660\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:27.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:27.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:27.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:07:27.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:28.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:29.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 4000\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:29.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:29.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:31.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:32.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 0.1\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:32.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:32.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2106.09685.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:32.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:32.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:32.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:34.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:35.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:37.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:38.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:39.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:08:39.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:42.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:42.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:42.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:43.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:45.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:47.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:49.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:54.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:55.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:57.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:58.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:58.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:58.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:09:58.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:00.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:01.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:03.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:04.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 175\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:04.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:04.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:06.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:07.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:09.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:10.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:11:13.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:15.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:17.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:18.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:20.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:21.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:22.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:24.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:24.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:24.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2201.11903.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:24.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:24.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:24.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:25.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:27.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:27.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:27.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:12:27.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:28.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:42.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 5\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:42.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:42.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:47.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:52.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 5\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:52.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:52.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:54.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:56.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:56.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:56.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:57.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:59.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:59.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-03 17:13:59.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:14:00.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:14:00.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:13.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:13.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:13.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:16.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:23.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:23.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:23.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:29.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:35.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:36.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:39.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:41.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:43.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:15:43.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:05.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 36505.75ms\n", - "\u001b[32m2025-02-03 17:17:44.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 100\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:44.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:44.078\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2210.05189.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:44.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:44.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:44.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:45.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:47.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:47.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:47.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:50.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:51.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:52.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:54.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:17:54.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "ERROR:tornado.access:503 POST /v1beta/models/gemini-2.0-flash-exp:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 1695.42ms\n", - "\u001b[32m2025-02-03 17:18:57.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:18:58.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:00.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:01.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:03.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:05.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:06.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:08.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:09.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:19:09.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:10.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:12.003\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:13.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:15.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:16.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:17.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:19.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:21.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:22.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:22.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:22.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:20:22.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:24.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:25.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:27.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:28.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:31.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:32.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:33.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:36.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:36.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:36.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:38.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:21:38.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:22:39.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:22:52.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:22:55.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:22:56.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:09.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 14\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:09.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:09.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:11.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:17.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:19.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:22.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:23:22.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:23.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:24.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:26.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:31.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:31.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:31.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:34.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:35.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:36.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:38.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:39.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:24:39.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:41.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:41.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:41.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile 2302.13971.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:41.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:41.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:41.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:42.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:43.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:45.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:46.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:47.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:49.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:51.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:52.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:25:52.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 17:26:53.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-03 17:26:55.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-03 17:26:56.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-03 17:26:58.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-03 17:26:59.389\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-03 17:27:00.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-03 17:27:00.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-03 17:27:00.716\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-03 17:27:02.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-03 17:27:03.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-03 17:27:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-03 17:27:05.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n" - ] - }, - { - "output_type": "error", - "ename": "KeyboardInterrupt", - "evalue": "", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"File {downloaded_document} already exists\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0manswers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msections\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprocess_document\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdownloaded_document\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdocument_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdocument_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mprocess_document\u001b[0;34m(document_file, document_data, model, find_prompt, answer_prompt)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0mquestion\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"question\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Question: {question}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 43\u001b[0;31m answer, sections_checked = find_retrieve_answer(\n\u001b[0m\u001b[1;32m 44\u001b[0m \u001b[0mquestion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msections_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfind_prompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0manswer_prompt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m )\n", - "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/structured_qa/workflow.py\u001b[0m in \u001b[0;36mfind_retrieve_answer\u001b[0;34m(question, model, sections_dir, find_prompt, answer_prompt, max_sections_to_check)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessages\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Failed to generate completion: {e}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/structured_qa/model_loaders.py\u001b[0m in \u001b[0;36mget_response\u001b[0;34m(self, messages)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcurrent_calls\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m9\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Waiting for 60 seconds\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcurrent_calls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate_content\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstacked_message\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 2201.11903.pdf...\n", + "[ ] (0/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 2/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 3/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 4/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 5/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 6/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 7/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 8/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 9/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (10/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (11/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (12/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (13/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (14/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (15/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (16/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (17/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (18/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (19/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (20/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (21/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (22/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (23/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (24/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (25/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (26/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (27/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (28/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (29/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (30/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (31/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (32/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (33/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (34/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (35/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (36/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (37/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (38/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (39/43)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (40/43)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (41/43)\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (42/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (43/43)\b\b\b\b\b\b\b\b\b" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3eW9TIKjHEgz" - }, - "outputs": [], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "for index, result in results.iterrows():\n", - " results.loc[index, \"pred_answer\"] = result[\"pred_answer\"].strip()\n", - " if result[\"pred_answer\"].startswith(\n", - " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", - " ):\n", - " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:39:55.481\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:55.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:55.500\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 21 sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:55.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2201.11903\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:55.510\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:55.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:55.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:55.518\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AhenESELHEgz" - }, - "outputs": [], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] }, { - "cell_type": "code", - "source": [], - "metadata": { - "id": "-acmSBPMvo1w" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:39:56.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:57.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:57.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:57.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:39:58.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:40:00.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:40:00.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:01.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:02.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:03.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:04.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:05.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:06.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:07.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:09.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:09.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:09.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:10.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:41:10.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:11.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 5\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:11.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:11.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:13.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:14.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:14.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:15.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:17.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Based on the provided information (Table 13), the model has seen two examples for the letter concatenation task: \"Waldo Schmidt\" and \"Daniel Friedman\". Thus, the model has seen 2 example names.\n", + "So the answer is B.\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:17.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:17.010\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:17.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:19.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:19.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:19.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:20.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:21.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:21.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:21.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:42:21.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:22.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:24.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:25.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:26.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:27.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:28.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:29.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:30.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:31.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:43:31.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:32.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:33.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:34.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:35.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:36.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:38.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:39.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:40.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:41.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:44:41.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:42.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:43.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:44.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:45.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:46.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:47.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:49.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:50.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:51.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:45:51.704\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:46:52.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:46:54.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:46:55.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:46:56.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:46:57.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:46:58.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:47:00.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:47:01.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:47:02.553\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:47:02.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:03.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:04.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:05.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:05.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:06.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:06.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:06.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2210.05189.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 2210.05189.pdf...\n", + "[ ] (0/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (1/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (2/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (3/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (4/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (5/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (6/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (7/8)\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (8/8)\b\b\b\b\b\b\b" + ] }, - "language_info": { - "name": "python", - "version": "3.10.12" + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:48:08.615\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:08.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:08.625\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 12 sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:08.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2210.05189\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:08.631\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:08.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:08.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:08.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:48:09.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:10.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:10.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:10.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:11.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:12.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:13.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:14.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:48:14.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:16.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:17.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:18.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:19.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:21.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:22.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:23.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:49:23.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:24.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:26.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:26.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:27.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:28.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:29.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:30.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:31.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:32.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:50:32.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:33.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:33.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:33.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:34.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:35.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:36.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:37.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:38.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:39.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:40.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:41.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:51:41.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:43.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:43.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:44.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:45.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:46.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:48.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:49.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:51.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:52:51.003\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:52.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:53.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:54.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:55.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:56.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:57.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:57.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:57.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:58.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:53:59.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:54:00.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:54:00.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:01.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:02.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:03.880\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 14\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:03.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:03.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:04.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:05.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:06.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:07.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:08.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:10.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:10.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:10.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:55:10.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:11.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:12.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:13.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:15.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:16.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:16.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:16.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:16.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:16.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2302.13971.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 2302.13971.pdf...\n", + "[ ] (0/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 1/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 2/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 3/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 4/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 5/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 6/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 7/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 8/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 9/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (10/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (11/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (12/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (13/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (14/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (15/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (16/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (17/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (18/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (19/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (20/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (21/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (22/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (23/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (24/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (25/27)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (26/27)\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (27/27)" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:56:30.134\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:30.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:30.151\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 17 sections\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:30.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2302.13971\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:30.160\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:30.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:30.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\b\b\b\b\b\b\b\b\b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 10:56:30.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:31.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:32.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:33.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:56:33.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:34.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:35.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:36.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:37.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:38.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:39.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:40.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:41.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:42.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:57:42.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:43.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:44.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:45.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:46.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:47.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:48.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:49.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:50.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:51.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 10:58:51.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:52.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:52.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:52.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:53.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:54.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:55.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:56.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:57.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 10:59:59.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:00:00.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:00:01.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:00:01.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:03.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:04.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:05.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:06.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:07.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:08.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:09.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:10.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:01:10.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:11.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:12.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:13.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:14.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:15.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:15.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:15.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:16.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:17.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:17.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:17.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:18.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:20.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:20.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:20.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:02:20.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:21.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:22.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 20\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:22.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:22.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:23.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:24.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:25.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:26.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:27.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:28.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:29.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:03:29.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:31.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:32.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:33.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:34.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:35.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:36.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:37.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:38.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:39.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:39.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:39.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:04:39.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:40.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:41.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:41.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:41.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:42.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:43.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:43.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:43.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:44.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:45.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:45.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:45.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:45.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:05:45.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf...\n", + "[ ] (0/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 2/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 3/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 4/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 5/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 6/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 7/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 8/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 9/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (10/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (11/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] (12/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (13/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (14/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (15/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (16/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (17/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (18/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (19/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (20/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (21/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (22/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (23/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (24/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (25/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (26/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (27/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (28/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (29/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (30/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (31/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (32/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (33/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (34/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (35/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (36/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (37/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (38/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (39/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (40/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (41/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (42/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (43/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (44/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (45/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (46/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (47/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (48/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (49/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (50/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (51/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (52/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (53/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (54/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (55/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (56/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (57/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (58/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (59/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (60/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (61/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (62/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (63/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (64/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (65/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (66/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (67/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (68/74)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (69/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (70/74)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (71/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (72/74)\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (73/74)\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (74/74)" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:06:05.243\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:05.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:05.272\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 26 sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:05.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:05.281\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:05.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:05.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:05.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\b\b\b\b\b\b\b\b\b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:06:06.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:07.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:08.351\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:06:08.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:09.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:09.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:09.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:11.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:12.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:12.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:12.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:13.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:15.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:16.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:17.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:17.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:17.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:18.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:20.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:20.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:20.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:07:20.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:21.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:22.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:23.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:25.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:25.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:25.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:25.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:25.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf...\n", + "[ ] (0/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 4/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 5/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 6/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 7/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 8/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 9/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 10/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 11/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 12/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 13/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 14/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 15/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 16/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 17/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 18/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 19/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 20/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 21/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 22/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 23/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 24/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 25/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 26/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 27/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 28/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 29/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 30/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 31/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 32/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 33/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 34/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 35/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 36/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 37/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 38/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 39/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 40/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 41/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 42/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 43/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 44/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 45/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 46/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 47/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 48/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 49/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 50/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 51/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 52/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 53/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 54/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 55/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 56/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 57/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 58/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 59/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 60/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 61/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 62/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 63/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 64/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 65/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 66/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 67/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 68/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 69/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 70/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 71/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 72/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 73/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 74/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 75/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 76/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 77/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 78/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 79/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 80/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 81/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 82/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 83/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 84/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 85/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 86/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 87/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 88/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 89/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 90/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 91/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 92/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 93/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 94/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 95/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 96/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 97/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 98/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 99/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (100/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (101/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (102/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (103/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (104/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (105/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (106/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (107/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (108/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (109/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (110/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (111/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (112/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (113/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (114/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (115/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (116/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (117/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (118/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (119/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (120/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (121/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (122/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (123/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (124/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (125/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (126/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (127/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (128/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (129/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (130/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (131/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (132/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (133/140)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (134/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (135/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (136/140)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (137/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (138/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (139/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (140/140)\b\b\b\b\b\b\b\b\b\b\b" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:08:38.611\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:38.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:08:38.650\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 40 sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:38.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:38.666\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:38.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:38.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:38.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:40.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:41.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:42.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:44.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:44.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:44.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:45.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:08:45.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:47.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:48.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:49.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:49.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:49.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:50.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:51.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:51.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:51.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:52.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:53.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:53.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:53.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:54.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:55.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:09:55.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:10:57.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:10:58.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2015\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:10:58.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-04 11:10:58.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:10:59.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:00.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:00.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:01.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:01.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:01.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf...\n", + "[ ] (0/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 4/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 5/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 6/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 7/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 8/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 9/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 10/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 11/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 12/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 13/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 14/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 15/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 16/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 17/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 18/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 19/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 20/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 21/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 22/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 23/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 24/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 25/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 26/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 27/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 28/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 29/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 30/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 31/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 32/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 33/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 34/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 35/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 36/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 37/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 38/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 39/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 40/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 41/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 42/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 43/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 44/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 45/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 46/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 47/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 48/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 49/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 50/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 51/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 52/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 53/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 54/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 55/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 56/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 57/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 58/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 59/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 60/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 61/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 62/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 63/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 64/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 65/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 66/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 67/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 68/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 69/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 70/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 71/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 72/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 73/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 74/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 75/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 76/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 77/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 78/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 79/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 80/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 81/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 82/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 83/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 84/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 85/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 86/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 87/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 88/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 89/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 90/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 91/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 92/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 93/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] ( 94/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] ( 95/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] ( 96/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 97/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 98/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 99/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (100/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (101/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (102/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (103/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (104/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (105/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (106/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (107/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (108/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (109/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (110/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (111/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (112/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (113/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (114/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (115/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (116/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (117/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (118/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (119/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (120/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (121/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (122/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (123/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (124/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (125/129)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (126/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (127/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (128/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (129/129)\b\b\b\b\b\b\b\b\b\b\b" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:11:39.970\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:39.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:11:40.067\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 254 sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:40.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:40.101\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:40.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:40.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:40.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:41.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:42.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:43.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:44.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:45.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:11:45.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:46.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:47.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:48.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:49.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:50.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:51.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:52.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:53.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:54.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:54.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:54.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:12:54.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:13:55.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:13:56.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:13:57.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:13:58.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:13:58.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-02-04 11:13:58.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:13:59.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:14:00.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:14:00.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-02-04 11:14:00.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:14:01.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:14:02.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:14:03.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:14:03.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:04.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:05.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:06.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:08.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:09.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:10.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:11.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:12.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:13.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:15:13.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:14.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:15.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:16.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:17.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:18.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:19.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:20.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:21.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:22.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:16:22.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:23.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:24.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:26.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:27.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:28.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:29.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:30.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:31.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:32.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:17:32.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:18:33.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:18:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:18:34.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:18:34.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:18:34.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:18:34.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing CUDA_C_Programming_Guide.pdf.pdf...\n", + "[ ] (0/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 4/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 5/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 6/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 7/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 8/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 9/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 10/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 11/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 12/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 13/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 14/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 15/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 16/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 17/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 18/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 19/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 20/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 21/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 22/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 23/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 24/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 25/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 26/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 27/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 28/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 29/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 30/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 31/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 32/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 33/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 34/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 35/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 36/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 37/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 38/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 39/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 40/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 41/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 42/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 43/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 44/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 45/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 46/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 47/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 48/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 49/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 50/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 51/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 52/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 53/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 54/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 55/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 56/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 57/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 58/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 59/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 60/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 61/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 62/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 63/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 64/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 65/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 66/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 67/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 68/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 69/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 70/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 71/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 72/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 73/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 74/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 75/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 76/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 77/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 78/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 79/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 80/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 81/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 82/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 83/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 84/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 85/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 86/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 87/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 88/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 89/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 90/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 91/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 92/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 93/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 94/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 95/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 96/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 97/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 98/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 99/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] (100/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] (101/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (102/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (103/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (104/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (105/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (106/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (107/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (108/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (109/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (110/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (111/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (112/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (113/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (114/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (115/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (116/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (117/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (118/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (119/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (120/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (121/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (122/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (123/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (124/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (125/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (126/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (127/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (128/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (129/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (130/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (131/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (132/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (133/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (134/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (135/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (136/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (137/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (138/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (139/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (140/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (141/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (142/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (143/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (144/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (145/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (146/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (147/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (148/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (149/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (150/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (151/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (152/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (153/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (154/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (155/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (156/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (157/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (158/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (159/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (160/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (161/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (162/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (163/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (164/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (165/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (166/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (167/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (168/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (169/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (170/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (171/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (172/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (173/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (174/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (175/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (176/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (177/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (178/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (179/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (180/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (181/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (182/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (183/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (184/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (185/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (186/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (187/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (188/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (189/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (190/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (191/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (192/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (193/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (194/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (195/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (196/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (197/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (198/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (199/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (200/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (201/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (202/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (203/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (204/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (205/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (206/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (207/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (208/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (209/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (210/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (211/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (212/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (213/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (214/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (215/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (216/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (217/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (218/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (219/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (220/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (221/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (222/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (223/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (224/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (225/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (226/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (227/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (228/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (229/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (230/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (231/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (232/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (233/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (234/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (235/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (236/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (237/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (238/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (239/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (240/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (241/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (242/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (243/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (244/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (245/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (246/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (247/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (248/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (249/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (250/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (251/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (252/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (253/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (254/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (255/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (256/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (257/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (258/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (259/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (260/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (261/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (262/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (263/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (264/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (265/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (266/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (267/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (268/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (269/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (270/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (271/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (272/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (273/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (274/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (275/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (276/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (277/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (278/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (279/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (280/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (281/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (282/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (283/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (284/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (285/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (286/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (287/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (288/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (289/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (290/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (291/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (292/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (293/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (294/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (295/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (296/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (297/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (298/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (299/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (300/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (301/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (302/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (303/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (304/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (305/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (306/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (307/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (308/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (309/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (310/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (311/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (312/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (313/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (314/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (315/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (316/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (317/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (318/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (319/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (320/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (321/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (322/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (323/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (324/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (325/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (326/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (327/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (328/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (329/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (330/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (331/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (332/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (333/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (334/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (335/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (336/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (337/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (338/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (339/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (340/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (341/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (342/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (343/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (344/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (345/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (346/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (347/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (348/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (349/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (350/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (351/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (352/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (353/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (354/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (355/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (356/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (357/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (358/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (359/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (360/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (361/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (362/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (363/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (364/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (365/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (366/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (367/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (368/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (369/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (370/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (371/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (372/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (373/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (374/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (375/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (376/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (377/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (378/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (379/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (380/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (381/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (382/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (383/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (384/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (385/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (386/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (387/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (388/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (389/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (390/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (391/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (392/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (393/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (394/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (395/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (396/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (397/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (398/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (399/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (400/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (401/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (402/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (403/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (404/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (405/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (406/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (407/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (408/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (409/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (410/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (411/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (412/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (413/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (414/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (415/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (416/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (417/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (418/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (419/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (420/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (421/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (422/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (423/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (424/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (425/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (426/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (427/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (428/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (429/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (430/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (431/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (432/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (433/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (434/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (435/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (436/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (437/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (438/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (439/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (440/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (441/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (442/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (443/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (444/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (445/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (446/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (447/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (448/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (449/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (450/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (451/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (452/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (453/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (454/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (455/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (456/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (457/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (458/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (459/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (460/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (461/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (462/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (463/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (464/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (465/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (466/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (467/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (468/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (469/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (470/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (471/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (472/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (473/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (474/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (475/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (476/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (477/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (478/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (479/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (480/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (481/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (482/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (483/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (484/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (485/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (486/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (487/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (488/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (489/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (490/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (491/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (492/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (493/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (494/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (495/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (496/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (497/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (498/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (499/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (500/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (501/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (502/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (503/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (504/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (505/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (506/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (507/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (508/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (509/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (510/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (511/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (512/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (513/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (514/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (515/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (516/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (517/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (518/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (519/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (520/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (521/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (522/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (523/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (524/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (525/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (526/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (527/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (528/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (529/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (530/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (531/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (532/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (533/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (534/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (535/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (536/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (537/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (538/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (539/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (540/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (541/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (542/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (543/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (544/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (545/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (546/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (547/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (548/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (549/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (550/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (551/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (552/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (553/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (554/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (555/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (556/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (557/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (558/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (559/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (560/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (561/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (562/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (563/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (564/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (565/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (566/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (567/582)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (568/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (569/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (570/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (571/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (572/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (573/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (574/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (575/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (576/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (577/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (578/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (579/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (580/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (581/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (582/582)\b\b\b\b\b\b\b\b\b\b\b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:20:20.044\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 11:20:20.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:20:20.245\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 447 sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:20:20.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:20:20.318\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 11:20:20.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 11:20:20.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-04 11:20:20.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:30.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:31.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:32.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:33.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1024\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:33.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:33.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:35.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:35.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:35.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:36.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:22:36.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:38.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:38.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:38.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:39.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:40.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:40.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:40.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:41.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:42.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:43.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:44.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:45.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:46.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:46.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:46.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:23:46.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:48.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:49.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:50.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:51.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:52.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:53.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:53.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:53.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:54.864\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:55.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:55.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:55.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:57.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:24:57.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:25:58.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:25:58.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-02-04 11:25:58.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:25:59.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:00.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:00.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:00.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:01.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:02.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:02.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:02.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:03.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:04.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:04.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:04.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:06.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:07.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:07.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:07.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:07.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:07.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 7DUME_EN01_Rules.pdf.pdf...\n", + "[ ] (0/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (1/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (2/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (3/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (4/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (5/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (6/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (7/8)\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (8/8)\b\b\b\b\b\b\b" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:26:16.222\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.231\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 25 sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.246\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:26:16.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:27:17.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:18.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:18.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:18.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:19.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:20.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:20.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:20.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:21.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:22.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:23.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:24.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:25.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:27:25.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:26.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:27.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:28.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:29.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:30.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:31.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:32.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:33.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:34.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:28:34.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:35.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:36.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:37.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:38.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:39.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:40.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:41.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:42.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:43.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:29:43.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:44.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:45.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:46.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:47.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:49.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:50.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:51.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:53.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:30:53.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:54.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:55.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:56.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:57.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:58.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:59.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:59.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-04 11:31:59.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:32:00.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:32:02.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:32:02.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-04 11:32:02.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:32:03.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:32:03.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:04.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:04.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:04.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:05.553\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:06.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:07.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:08.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:09.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:11.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:12.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:13.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:33:13.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:14.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:15.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:16.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:17.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:17.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:17.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:19.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:20.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:20.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:20.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:21.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:22.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:23.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:34:23.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:24.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:26.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:27.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:28.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:29.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:29.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:29.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:30.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:31.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:31.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:31.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:32.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:33.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:35:33.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:34.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:35.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:36.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:37.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:38.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:40.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:41.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:42.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:42.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:42.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:43.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:36:43.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:44.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:45.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:46.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:48.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:49.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:50.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:51.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:52.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:53.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:37:53.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:38:54.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:38:55.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:38:56.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:38:57.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:38:59.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:39:00.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:39:01.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:39:02.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:39:03.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:39:03.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:04.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:05.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:07.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:08.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:09.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:11.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:12.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:13.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:14.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:40:14.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:15.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:17.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:18.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:19.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:20.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:21.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:22.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:23.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:24.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:41:24.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:25.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:26.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:26.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:26.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:28.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:29.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:30.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:31.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:32.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:33.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:34.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:42:34.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:36.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:37.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:39.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:40.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:42.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:43.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:44.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:46.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:47.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:43:47.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:49.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:50.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:52.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:53.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:54.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:56.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:57.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:44:58.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:45:00.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:45:00.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:01.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:02.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:03.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:03.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:03.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:05.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:06.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:07.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:09.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:10.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:11.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:46:11.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:12.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:14.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:15.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:17.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:18.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:19.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:20.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:21.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:22.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:47:22.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:23.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:24.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:25.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:26.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:28.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:29.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:30.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:31.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:32.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:48:32.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:33.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:35.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:36.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:37.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:38.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:39.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:40.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:42.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:43.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:49:43.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:44.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:46.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:47.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:47.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:47.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:48.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:49.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:49.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:49.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:51.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:52.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:53.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:55.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 7\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:55.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:55.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:55.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:50:55.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting is_eotn_rulebook.pdf.pdf\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing is_eotn_rulebook.pdf.pdf...\n", + "[ ] (0/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 1/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 2/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 3/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 4/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 5/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 6/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 7/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 8/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 9/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (10/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (11/12)\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (12/12)\b\b\b\b\b\b\b\b\b" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:51:02.798\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.808\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 40 sections\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.819\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.828\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:51:02.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-02-04 11:52:04.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:05.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:06.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:07.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:08.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:09.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:10.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:11.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:13.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:52:13.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:14.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:15.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:16.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:17.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:19.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:20.660\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:21.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:22.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:23.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:53:23.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:25.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:26.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:27.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:28.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:29.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:30.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:31.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:32.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:33.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:54:33.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:35.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:36.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:37.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:38.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:39.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:40.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:41.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:42.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:43.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:55:43.849\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:45.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:46.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:47.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:47.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:47.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:48.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:50.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:51.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:52.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:53.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:54.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:56:54.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:57:55.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:57:57.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:57:58.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:57:59.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:00.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:01.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:03.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:04.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:04.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:04.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:05.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:58:05.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:06.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:06.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:06.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:08.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:09.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:10.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:11.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:13.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:15.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:16.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 25\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:16.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:16.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 11:59:16.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:17.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:18.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:18.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:18.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:20.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:21.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:22.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:23.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:25.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:26.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:27.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:00:27.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:30.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:31.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:32.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:34.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:35.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:36.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:37.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:39.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:01:39.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:40.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:41.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:42.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:43.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:44.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:46.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:47.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:48.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:49.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:02:49.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:03:51.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:03:52.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:03:54.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:03:55.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:03:56.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:03:57.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:03:59.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:04:00.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:04:01.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:04:01.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:02.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:03.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:05.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:05.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:07.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:07.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:07.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:08.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:09.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:11.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:12.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:05:12.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:13.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:14.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:16.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:17.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:18.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:19.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:20.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:21.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:22.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:06:22.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:24.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:25.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:26.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:27.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:28.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:29.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:31.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:32.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:33.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:07:33.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:34.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:36.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:37.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:38.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:39.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:40.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:41.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:42.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:44.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:08:44.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:45.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:46.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:47.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:48.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:49.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:50.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:51.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:52.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:52.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:52.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:54.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:09:54.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:10:56.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 12:10:56.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-04 12:10:56.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:10:57.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:10:58.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:10:59.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:01.518\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:02.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:04.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:05.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:06.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:06.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:06.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:11:06.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:08.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:09.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:11.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:12.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:12.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:12.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:13.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:14.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:14.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:14.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:16.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:17.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:17.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:17.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:19.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", + "\u001b[32m2025-02-04 12:12:19.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 12:13:20.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", + "\u001b[32m2025-02-04 12:13:21.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", + "\u001b[32m2025-02-04 12:13:22.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", + "\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 489 + }, + "id": "3eW9TIKjHEgz", + "outputId": "fea993e6-3100-4545-b1ad-ddc9d0100fd0" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 14,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 21,\n \"min\": 12,\n \"max\": 100,\n \"num_unique_values\": 14,\n \"samples\": [\n 54,\n 56,\n 12\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/2210.05189\",\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"CARD AND TILE EFFECTS\",\n \"3 Experimental Results\",\n \"23.1. What is Lazy Loading?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"Can players conquer and pillage the same island during the expedition phase?\",\n \"How many victory points you get from each conquered island?\",\n \"How many layers are in the toy model (y = x^2)?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"1\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"NOT FOUND\",\n \"3\",\n \"B\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"EXPEDITION PHASE\",\n \"4 3\",\n \"Caglar Aytekin AI Lead AAC Technologies\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1212https://arxiv.org/pdf/2210.051893 Experimental ResultsHow many layers are in the toy model (y = x^2)?3NOT FOUNDCaglar Aytekin AI Lead AAC Technologies
2828https://arxiv.org/pdf/2201.119033.1 Experimental SetupHow many large language models were evaluated?53Abstract
3232https://arxiv.org/pdf/2201.119035 Symbolic ReasoningWhich symbolic reasoning task is used as an ou...AYES5 Symbolic Reasoning
3333https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32H Appendix: Alternate Annotators for MWP
3434https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...100NOT FOUNDChain-of-Thought Prompting Elicits Reasoning i...
4545https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BNOT FOUND4
4848https://github.com/mozilla-ai/structured-qa/re...END OF THE GAMECan the game end in a tie?YESNOOVERVIEW AND GOAL
5050https://github.com/mozilla-ai/structured-qa/re...LOOKOUT PHASEWhat is the maximum number of cards a player m...4NOT FOUND1 3 3 5 7 8
5151https://github.com/mozilla-ai/structured-qa/re...LOOKOUT PHASEIs there a limit to the number of cards a play...NOYESCLEANUP PHASE players with extra Goods if enou...
5454https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASECan players conquer and pillage the same islan...NOYESEXPEDITION PHASE
5555https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNOT FOUNDINTRO
5656https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEHow many victory points you get from each conq...1NOT FOUND4 3
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...23.1. What is Lazy Loading?Can you enable lazy loading by setting the env...NOYES23.1. What is Lazy Loading?
100100https://assets.publishing.service.gov.uk/media...Procurement in an emerging marketWhich of the following is NOT mentioned as a r...CBPrinciple 2: You use generative AI lawfully, e...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "12 12 https://arxiv.org/pdf/2210.05189 \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "32 32 https://arxiv.org/pdf/2201.11903 \n", + "33 33 https://arxiv.org/pdf/2201.11903 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "45 45 https://github.com/mozilla-ai/structured-qa/re... \n", + "48 48 https://github.com/mozilla-ai/structured-qa/re... \n", + "50 50 https://github.com/mozilla-ai/structured-qa/re... \n", + "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", + "54 54 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "56 56 https://github.com/mozilla-ai/structured-qa/re... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "100 100 https://assets.publishing.service.gov.uk/media... \n", + "\n", + " section \\\n", + "12 3 Experimental Results \n", + "28 3.1 Experimental Setup \n", + "32 5 Symbolic Reasoning \n", + "33 3.4 Robustness of Chain of Thought \n", + "34 3.2 Results \n", + "45 CARD AND TILE EFFECTS \n", + "48 END OF THE GAME \n", + "50 LOOKOUT PHASE \n", + "51 LOOKOUT PHASE \n", + "54 EXPEDITION PHASE \n", + "55 EXPEDITION PHASE \n", + "56 EXPEDITION PHASE \n", + "78 23.1. What is Lazy Loading? \n", + "100 Procurement in an emerging market \n", + "\n", + " question answer pred_answer \\\n", + "12 How many layers are in the toy model (y = x^2)? 3 NOT FOUND \n", + "28 How many large language models were evaluated? 5 3 \n", + "32 Which symbolic reasoning task is used as an ou... A YES \n", + "33 How many annotators provided independent chain... 3 2 \n", + "34 How many random samples were examined to under... 100 NOT FOUND \n", + "45 Which type of cards provide coins? -A: Gray -B... B NOT FOUND \n", + "48 Can the game end in a tie? YES NO \n", + "50 What is the maximum number of cards a player m... 4 NOT FOUND \n", + "51 Is there a limit to the number of cards a play... NO YES \n", + "54 Can players conquer and pillage the same islan... NO YES \n", + "55 Do you need a fish to conquer a distant island? YES NOT FOUND \n", + "56 How many victory points you get from each conq... 1 NOT FOUND \n", + "78 Can you enable lazy loading by setting the env... NO YES \n", + "100 Which of the following is NOT mentioned as a r... C B \n", + "\n", + " pred_section \n", + "12 Caglar Aytekin AI Lead AAC Technologies \n", + "28 Abstract \n", + "32 5 Symbolic Reasoning \n", + "33 H Appendix: Alternate Annotators for MWP \n", + "34 Chain-of-Thought Prompting Elicits Reasoning i... \n", + "45 4 \n", + "48 OVERVIEW AND GOAL \n", + "50 1 3 3 5 7 8 \n", + "51 CLEANUP PHASE players with extra Goods if enou... \n", + "54 EXPEDITION PHASE \n", + "55 INTRO \n", + "56 4 3 \n", + "78 23.1. What is Lazy Loading? \n", + "100 Principle 2: You use generative AI lawfully, e... " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " results.loc[index, \"pred_answer\"] = result[\"pred_answer\"].strip()\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" + }, + "id": "AhenESELHEgz", + "outputId": "87682c14-6099-4f9e-bc51-b4d796cab7c0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8640776699029126" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "-acmSBPMvo1w" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "language_info": { + "name": "python", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index c25fa76..bc9edb8 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -229,11 +229,10 @@ " file = genai.get_file(file.name)\n", "\n", " logger.info(\"Predicting\")\n", - " n = 0\n", " answers = {}\n", " sections = {}\n", " for index, row in document_data.iterrows():\n", - " if n > 0 and n % 10 == 0:\n", + " if model.n > 0 and model.n % 9 == 0:\n", " logger.info(\"Waiting for 60 seconds\")\n", " time.sleep(60)\n", " question = row[\"question\"]\n", @@ -246,7 +245,7 @@ " response_json = json.loads(response.text)\n", " answers[index] = response_json[\"answer\"]\n", " sections[index] = response_json[\"section\"]\n", - " n += 1\n", + " model.n += 1\n", " return answers, sections" ] }, @@ -279,32 +278,20 @@ "outputs": [], "source": [ "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with two keys: \"section\" and \"answer\".\n", - "In `\"section\"`, you will return the name of the section where you found the answer.\n", - "In `\"answer\"`, you will return the answer one of the following JSON:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"section\": \"2. Architecture\",\n", - " \"answer\": 12\n", - "}\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model?\n", - "-A: ReLU\n", - "-B: Sigmoid\n", - "-C: Tanh\n", - "{\n", - " \"section\": \"2. Architecture\",\n", - " \"answer\": \"C\"\n", - "}\n", "\"\"\"" ] }, @@ -316,13 +303,8 @@ }, "outputs": [], "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")" + "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=SYSTEM_PROMPT)\n", + "model.n = 0" ] }, { diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index c11484e..b5877a5 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -48,8 +48,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Cloning into 'structured-qa'...\n", "remote: Enumerating objects: 893, done.\u001b[K\n", @@ -77,8 +77,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Processing ./structured-qa\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", @@ -292,25 +292,20 @@ "outputs": [], "source": [ "SYSTEM_PROMPT = \"\"\"\n", - "You are given an input document and a question.\n", - "You can only answer the question based on the information in the document.\n", - "You will return a JSON name with one key: \"answer\".\n", - "In `\"answer\"`, you will return the answer in one of the following JSON contents:\n", - "- Yes/No (for boolean questions)\n", - "Is the model an LLM?\n", - "{\n", - " \"answer\": \"No\"\n", - "}\n", - "- Single number (for numeric questions)\n", - "How many layers does the model have?\n", - "{\n", - " \"answer\": 12\n", - "}\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", "- Single letter (for multiple-choice questions)\n", - "What is the activation function used in the model? -A: ReLU -B: Sigmoid -C: Tanh\n", - "{\n", - " \"answer\": \"C\"\n", - "}\n", "\"\"\"" ] }, @@ -322,13 +317,7 @@ }, "outputs": [], "source": [ - "model = load_gemini_model(\n", - " \"gemini-2.0-flash-exp\",\n", - " system_prompt=SYSTEM_PROMPT,\n", - " generation_config={\n", - " \"response_mime_type\": \"application/json\",\n", - " },\n", - ")\n", + "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=SYSTEM_PROMPT)\n", "model.n = 0" ] }, @@ -354,8 +343,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "\u001b[32m2025-02-03 13:58:31.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", "\u001b[32m2025-02-03 13:58:31.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", @@ -885,25 +874,11 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "33 33 https://arxiv.org/pdf/2201.11903 \n", - "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", - "\n", - " section \\\n", - "33 3.4 Robustness of Chain of Thought \n", - "42 CARD AND TILE COSTS \n", - "\n", - " question answer pred_answer \\\n", - "33 How many annotators provided independent chain... 3 2 \n", - "42 Can a player pay coins to compensate for missi... YES NO \n", - "\n", - " pred_section \n", - "33 NaN \n", - "42 NaN " - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 33,\n \"max\": 42,\n \"num_unique_values\": 2,\n \"samples\": [\n 42,\n 33\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, "text/html": [ "\n", "
\n", @@ -1168,13 +1143,27 @@ "
\n", " \n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 33,\n \"max\": 42,\n \"num_unique_values\": 2,\n \"samples\": [\n 42,\n 33\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } + "text/plain": [ + " Unnamed: 0 document \\\n", + "33 33 https://arxiv.org/pdf/2201.11903 \n", + "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", + "\n", + " section \\\n", + "33 3.4 Robustness of Chain of Thought \n", + "42 CARD AND TILE COSTS \n", + "\n", + " question answer pred_answer \\\n", + "33 How many annotators provided independent chain... 3 2 \n", + "42 Can a player pay coins to compensate for missi... YES NO \n", + "\n", + " pred_section \n", + "33 NaN \n", + "42 NaN " + ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], "source": [ @@ -1194,14 +1183,14 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.9805825242718447" ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], "source": [ @@ -1235,4 +1224,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 65553045238c48742b62641fae96ff3eb287de1b Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 4 Feb 2025 16:20:49 +0100 Subject: [PATCH 112/120] Update results with same prompts --- benchmark/gemini_full_context.ipynb | 2390 +++++++++++------------- benchmark/gemini_perfect_context.ipynb | 979 ++++------ 2 files changed, 1411 insertions(+), 1958 deletions(-) diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index bc9edb8..0e11127 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -1,1338 +1,1086 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oD2lVadPlyhR" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JZeb4ABvlyhS" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aI1i7vlyhS" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "atlPXFshlyhS" - }, - "source": [ - "## Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oD2lVadPlyhR" + }, + "source": [ + "# Structured Q&A" + ] }, - "id": "QrgOGtuGlyhT", - "outputId": "d521f860-eab5-41b3-df4c-cbda5c15c5a6" - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-nwtt45ou\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-nwtt45ou\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit c5ee8e63ab951b740147be2d69c2f00549043734\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (3.12.1)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev105+gc5ee8e6) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev105+gc5ee8e6) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.17.0)\n" - ] - } - ], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "markdown", + "metadata": { + "id": "JZeb4ABvlyhS" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] }, - "id": "S22kTrfPlyhU", - "outputId": "676b4a64-71f6-416b-da99-6b393f324870" - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-02-03 14:30:33-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 21441 (21K) [text/plain]\n", - "Saving to: ‘structured_qa.csv.2’\n", - "\n", - "\rstructured_qa.csv.2 0%[ ] 0 --.-KB/s \rstructured_qa.csv.2 100%[===================>] 20.94K --.-KB/s in 0s \n", - "\n", - "2025-02-03 14:30:33 (100 MB/s) - ‘structured_qa.csv.2’ saved [21441/21441]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qwHWJEsulyhV" - }, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "iJ812u2llyhV" - }, - "outputs": [], - "source": [ - "import os\n", - "import google.generativeai as genai\n", - "from google.colab.userdata import get, SecretNotFoundError\n", - "\n", - "try:\n", - " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", - "except SecretNotFoundError as e:\n", - " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "V9vfT0kwOnCI" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MKijHC_ClyhX" - }, - "source": [ - "## Function to Process all questions for a single Document" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "oFU-eYMVlyhX" - }, - "outputs": [], - "source": [ - "import json\n", - "import time\n", - "\n", - "\n", - "def process_document_questions(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - "):\n", - " logger.info(\"Uploading file\")\n", - " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", - " while file.state.name == \"PROCESSING\":\n", - " logger.debug(\"Waiting for file to be processed.\")\n", - " time.sleep(2)\n", - " file = genai.get_file(file.name)\n", - "\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " if model.n > 0 and model.n % 9 == 0:\n", - " logger.info(\"Waiting for 60 seconds\")\n", - " time.sleep(60)\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " try:\n", - " response = model.model.generate_content([file, question])\n", - " except Exception:\n", - " response_json = json.dumps({\"answer\": \"Error\", \"section\": \"Error\"})\n", - " logger.info(response.text)\n", - " response_json = json.loads(response.text)\n", - " answers[index] = response_json[\"answer\"]\n", - " sections[index] = response_json[\"section\"]\n", - " model.n += 1\n", - " return answers, sections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQAof5xtlyhY" - }, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "rrASU0BIOnCI" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_gemini_model" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "whtSJwdrlyhZ" - }, - "outputs": [], - "source": [ - "SYSTEM_PROMPT = \"\"\"\n", - "You are a rigorous assistant answering questions.\n", - "You must only answer based on the current information available which is:\n", - "\n", - "```\n", - "{CURRENT_INFO}\n", - "```\n", - "\n", - "If the current information available not enough to answer the question,\n", - "you must return \"I need more info\" srting and nothing else:\n", - "\n", - "If the current information is enough to answer, you must return one of the following formats:\n", - "- YES/NO (for boolean questions)\n", - "- Number (for numeric questions)\n", - "- Single letter (for multiple-choice questions)\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "ObsvwlNslyhZ" - }, - "outputs": [], - "source": [ - "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=SYSTEM_PROMPT)\n", - "model.n = 0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W97jWzzOlyhZ" - }, - "source": [ - "# Run Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + "cell_type": "markdown", + "metadata": { + "id": "77aI1i7vlyhS" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] }, - "id": "AZBwRnfjlyhZ", - "outputId": "f3ace26a-eb56-405a-a436-d802854f29bd" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-03 14:30:36.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-03 14:30:36.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:30:36.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 14:30:36.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:30:38.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:30:38.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-03 14:31:38.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Assessing Responsible AI\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:31:38.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-03 14:32:37.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Assessing Responsible AI\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:32:37.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-03 14:33:43.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Chapter 1: Research and Development\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:33:43.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-03 14:34:49.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Appendix\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:34:49.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:48.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"2.2 Language\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:48.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-03 14:36:49.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.3 Frontier Al Research\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:36:49.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:49.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"7.1 Overview of Al Policy in 2023\",\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:49.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:54.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"7.4 Al Regulation\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:54.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:58.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"6.1 Postsecondary CS and Al Education\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:58.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-03 14:40:56.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"8.1 Al Postsecondary Education\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:40:56.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-03 14:40:56.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:40:56.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:40:58.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:40:58.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:03.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3 Model Architecture\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:03.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:07.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:07.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:12.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.1 Encoder and Decoder Stacks\",\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:12.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:16.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.2.2 Multi-Head Attention\",\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:16.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:20.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.4 Embeddings and Softmax\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:20.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:24.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"6.2 Model Variations\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:24.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:33.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2 Hardware and Schedule\",\n", - " \"answer\": \"8\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:33.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:36.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2 Hardware and Schedule\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:36.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:43.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.3 Optimizer\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:43.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:46.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.3 Optimizer\",\n", - " \"answer\": \"4000\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:46.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:46.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:50.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.4 Regularization\",\n", - " \"answer\": \"0.1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:50.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:50.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:50.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:52.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:52.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:00.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"4 OUR METHOD\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:00.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:07.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"ABSTRACT\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:07.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:16.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"1 INTRODUCTION\",\n", - " \"answer\": \"175\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:16.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:23.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Method\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:23.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:23.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:23.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:24.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:24.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:47.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"3 Arithmetic Reasoning\",\n", - "\"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:47.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:01.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.1 Experimental Setup\",\n", - "\"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:01.461\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:09.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"3 Arithmetic Reasoning\",\n", - "\"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:09.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:17.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5 Symbolic Reasoning\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:17.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:42.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"5 Symbolic Reasoning\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:42.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:50.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5 Symbolic Reasoning\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:50.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:57.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"3.4 Robustness of Chain of Thought\",\n", - "\"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:57.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:07.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3.2 Results\",\n", - " \"answer\": \"100\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:07.861\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:07.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:07.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:09.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:09.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:12.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"2.4. Recurrent Networks\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:12.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:15.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:15.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:19.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:19.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:22.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Table 1. Computation and memory analysis of toy problems.\",\n", - " \"answer\": \"14\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:22.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:25.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. Experimental Results\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:25.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:28.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Conclusion\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:28.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:28.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:28.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:29.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:29.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:37.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"2.1 Pre-training Data\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:37.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"2.1 Pre-training Data\",\n", - "\"answer\": 20\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:48.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:55.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"2.3 Optimizer\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:55.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:18.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"2.3 Optimizer\",\n", - "\"answer\": 0.1\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:18.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:26.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"3 Main results\",\n", - "\"answer\": 20\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:26.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:36.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"3 Main results\",\n", - "\"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:36.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:47.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Bias, Toxicity and Misinformation\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:47.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:54.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"5. Bias, Toxicity and Misinformation\",\n", - "\"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:54.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:04.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2 CrowS-Pairs\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:04.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:04.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:04.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:05.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:05.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:15.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Limitations of generative Al and LLMs\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:15.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:42.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Limitations of generative Al and LLMs\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:42.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Procurement in an emerging market\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:28.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:41.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Accountability and responsibility\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:41.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:54.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Accountability and responsibility\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:54.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:55.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:55.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:57.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:48:57.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-03 14:49:39.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"1. Introduction\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:49:39.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-03 14:49:50.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"Chapter 5\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:49:50.955\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:01.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. Benefits of Open Access\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:01.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:26.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. Where Do You Want to Make Your Work Available?\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:26.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:38.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"3. Open Access Policies\",\n", - " \"answer\": \"2015\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:38.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:53.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Chapter 5\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:53.028\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:53.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:53.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:55.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:50:55.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-03 14:51:21.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"2. SANITARY HOT WATER INSTALLATIONS\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:51:21.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-02-03 14:51:34.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"1.2.8. GAS DETECTION AND VENTING\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:51:34.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-02-03 14:51:47.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"4. VISUAL COMFORT\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:51:47.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-02-03 14:52:01.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"1.1.3. OCCUPATIONAL SAFETY\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:52:01.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:52:01.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:52:01.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 14:52:03.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:52:03.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-03 14:53:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5.2. Thread Hierarchy\",\n", - " \"answer\": 1024\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:53:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-02-03 14:53:55.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"5.2. Thread Hierarchy\",\n", - "\"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:53:55.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-02-03 14:54:48.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"6.1.1.1 Offline Compilation\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:54:48.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-02-03 14:55:38.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"6.1.1.1 Offline Compilation\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:55:38.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-02-03 14:56:37.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"6.1.1.2 Just-in-Time Compilation\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:56:37.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-02-03 14:57:25.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"6.1.1.2 Just-in-Time Compilation\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:57:25.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-02-03 14:58:17.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"15.3. API Fundamentals\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:58:17.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-02-03 14:59:16.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"15 Graph Memory Nodes\",\n", - "\"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:59:16.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-02-03 15:00:16.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"15.3. API Fundamentals\",\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:00:16.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-02-03 15:01:09.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"15.3. API Fundamentals\",\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:01:09.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 15:02:09.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-02-03 15:03:02.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"23.1. What is Lazy Loading?\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:03:02.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:03.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"23.1. What is Lazy Loading?\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:03.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:04.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:04.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:05.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:05.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:15.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"OVERVIEW AND GOAL\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:15.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:24.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:24.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:37.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:37.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:46.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Turn overview\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:46.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:55.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"Turn overview\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:04:55.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:03.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"A. Take a Chapter card\",\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:03.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:11.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"3. CHAPTER OVERVIEW\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:11.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:21.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. Card and Tile Costs\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:21.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:28.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"4. CARD AND TILE COSTS\",\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:28.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:37.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:05:37.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 15:06:37.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-03 15:06:45.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"CARD AND TILE EFFECTS\",\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:06:45.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-03 15:06:58.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"5. CARD AND TILE EFFECTS\",\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:06:58.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"CONQUERING MIDDLE-EARTH\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:15.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:15.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:23.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"END OF THE GAME\",\n", - " \"answer\": 7\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:23.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:23.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:23.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:25.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:25.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:31.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"LOOKOUT PHASE\",\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:31.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:36.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"6. GAME FLOW\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:36.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:42.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - "\"section\": \"ACTION PHASE\",\n", - "\"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:42.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:48.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME FLOW\",\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:48.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:54.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"EXPEDITION PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:07:54.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:01.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"7. EXPEDITION PHASE\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:01.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:07.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"7. EXPEDITION PHASE\",\n", - " \"answer\": \"1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:07.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:12.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"CLEANUP PHASE\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:12.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:19.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"LOCATION ABILITIES\",\n", - " \"answer\": \"1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:19.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:24.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"9. BUILD A LOCATION\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:08:24.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 15:09:24.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-03 15:09:30.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"ACTIONS\",\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:09:30.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-03 15:09:36.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME END\",\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 15:09:36.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-03 15:09:42.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m31\u001b[0m - \u001b[1m{\n", - " \"section\": \"GAME END\",\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document_questions(\n", - " downloaded_document, document_data, model\n", - " )\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a9eMkW1-lyha" - }, - "source": [ - "# Results" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 175 + "cell_type": "markdown", + "metadata": { + "id": "atlPXFshlyhS" + }, + "source": [ + "## Installing dependencies" + ] }, - "id": "EYYJgWf6lyha", - "outputId": "a6fcb444-24c2-4dd0-84f4-cbad3011e567" - }, - "outputs": [ { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 5,\n \"max\": 78,\n \"num_unique_values\": 4,\n \"samples\": [\n 39,\n 78,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"CHAPTER OVERVIEW\",\n \"23.1. What is Lazy Loading?\",\n \"3.5 Positional Encoding\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Can you take a Chapter card and a Landmark tile on your same turn?\",\n \"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\",\n \"Does the final model use learned positional embeddings?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Turn overview\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QrgOGtuGlyhT", + "outputId": "f1657f11-fbbb-4323-b7e7-6d68bcb2e139" }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
55https://arxiv.org/pdf/1706.037623.5 Positional EncodingDoes the final model use learned positional em...NOYES6.2 Model Variations
3939https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWCan you take a Chapter card and a Landmark til...NOYESTurn overview
4444https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSCan you use a symbol more than once per turn?NOYES5. CARD AND TILE EFFECTS
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...23.1. What is Lazy Loading?Can you enable lazy loading by setting the env...NOYES23.1. What is Lazy Loading?
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-uq1w5jgv\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-uq1w5jgv\n", + " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", + " Switched to a new branch '5-add-benchmark'\n", + " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 97049d67d83ec6129569d442bd365c7a5e490578\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (3.12.1)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev111+g97049d6) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev111+g97049d6) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.17.0)\n" + ] + } ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "5 5 https://arxiv.org/pdf/1706.03762 \n", - "39 39 https://github.com/mozilla-ai/structured-qa/re... \n", - "44 44 https://github.com/mozilla-ai/structured-qa/re... \n", - "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "\n", - " section \\\n", - "5 3.5 Positional Encoding \n", - "39 CHAPTER OVERVIEW \n", - "44 CARD AND TILE EFFECTS \n", - "78 23.1. What is Lazy Loading? \n", - "\n", - " question answer pred_answer \\\n", - "5 Does the final model use learned positional em... NO YES \n", - "39 Can you take a Chapter card and a Landmark til... NO YES \n", - "44 Can you use a symbol more than once per turn? NO YES \n", - "78 Can you enable lazy loading by setting the env... NO YES \n", - "\n", - " pred_section \n", - "5 6.2 Model Variations \n", - "39 Turn overview \n", - "44 5. CARD AND TILE EFFECTS \n", - "78 23.1. What is Lazy Loading? " + "source": [ + "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "wfz1XQDLlyha", - "outputId": "351fb6b6-c4b3-44a5-dd99-9e6ee3704326" - }, - "outputs": [ { - "data": { - "text/plain": [ - "0.9611650485436893" + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S22kTrfPlyhU", + "outputId": "5aad43fa-5177-4bb2-e400-6aa364bfa7f2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2025-02-04 13:59:31-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 23304 (23K) [text/plain]\n", + "Saving to: ‘structured_qa.csv.1’\n", + "\n", + "\rstructured_qa.csv.1 0%[ ] 0 --.-KB/s \rstructured_qa.csv.1 100%[===================>] 22.76K --.-KB/s in 0.002s \n", + "\n", + "2025-02-04 13:59:32 (10.6 MB/s) - ‘structured_qa.csv.1’ saved [23304/23304]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "import google.generativeai as genai\n", + "from google.colab.userdata import get, SecretNotFoundError\n", + "\n", + "try:\n", + " genai.configure(api_key=get(\"GOOGLE_API_KEY\"))\n", + "except SecretNotFoundError as e:\n", + " raise RuntimeError(\"Please set the GOOGLE_API_KEY secret to your API key\") from e\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "V9vfT0kwOnCI" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "\n", + "\n", + "def process_document_questions(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", + " logger.info(\"Uploading file\")\n", + " file = genai.upload_file(document_file, mime_type=\"application/pdf\")\n", + " while file.state.name == \"PROCESSING\":\n", + " logger.debug(\"Waiting for file to be processed.\")\n", + " time.sleep(2)\n", + " file = genai.get_file(file.name)\n", + "\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " if model.n > 0 and model.n % 9 == 0:\n", + " logger.info(\"Waiting for 60 seconds\")\n", + " time.sleep(60)\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " try:\n", + " response = model.model.generate_content([file, question])\n", + " except Exception:\n", + " answers[index] = \"Error\"\n", + " sections[index] = None\n", + " continue\n", + " logger.info(response.text)\n", + " answers[index] = response.text\n", + " sections[index] = None\n", + " model.n += 1\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "rrASU0BIOnCI" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_gemini_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "whtSJwdrlyhZ" + }, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", + "- Single letter (for multiple-choice questions)\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ObsvwlNslyhZ" + }, + "outputs": [], + "source": [ + "model = load_gemini_model(\"gemini-2.0-flash-exp\", system_prompt=SYSTEM_PROMPT)\n", + "model.n = 0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W97jWzzOlyhZ" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "AZBwRnfjlyhZ", + "outputId": "9ad4c6b4-063a-4ae6-b802-9e7df4d265fd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[32m2025-02-04 13:59:35.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-04 13:59:35.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 13:59:35.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-02-04 13:59:35.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 13:59:37.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 13:59:37.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-04 14:00:37.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:00:37.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-02-04 14:01:40.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:01:40.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-02-04 14:02:42.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:02:42.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-02-04 14:03:41.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:03:41.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-02-04 14:04:44.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:04:44.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-02-04 14:05:44.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:05:44.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-02-04 14:06:46.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m25\u001b[0m\n", + "\u001b[32m2025-02-04 14:06:46.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-02-04 14:07:50.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:07:50.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-02-04 14:08:50.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:08:50.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:09:50.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:50.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:50.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:51.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:51.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:52.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:52.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:56.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:10:56.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:00.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m6\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:00.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:03.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m6\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:03.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:06.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m8\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:06.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:10.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:10.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:13.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:13.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:17.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m8\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:17.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:21.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:11:21.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:21.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:24.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:24.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:28.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m4000\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:28.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:31.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m0.1\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:31.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:31.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:31.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:33.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:33.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:39.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:39.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:45.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:45.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:51.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m175\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:51.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:57.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:57.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:58.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:58.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:59.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:12:59.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-04 14:13:05.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:13:05.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-02-04 14:13:12.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mFive\u001b[0m\n", + "\u001b[32m2025-02-04 14:13:12.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:12.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:18.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m5\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:18.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:26.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:26.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:33.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:33.465\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:39.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:39.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:46.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:46.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:53.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m50\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:53.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:53.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:53.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:54.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:54.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:57.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:57.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:59.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", + "\u001b[32m2025-02-04 14:14:59.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-04 14:15:02.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:15:02.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:02.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:04.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m14\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:04.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:07.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:07.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:09.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:09.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:09.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:09.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:11.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:11.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:16.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:16.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:22.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNumber\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:22.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:27.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:27.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:33.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m0.1\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:33.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:38.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m20\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:38.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:43.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:16:43.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:17:43.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-04 14:17:49.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:17:49.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-04 14:17:54.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:17:54.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:00.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:00.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:01.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:01.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:02.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:02.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:11.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:11.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:18.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:18.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:26.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:26.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:34.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:34.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:42.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:42.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:43.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:43.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:44.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:54.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:18:54.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:19:54.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:04.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:04.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:13.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:13.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:39.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m2015\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:39.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:49.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:49.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:50.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:50.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:51.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:20:51.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:03.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:03.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:15.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:15.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:28.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:28.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:40.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:40.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:40.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:40.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:42.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:21:42.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:22:42.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-04 14:23:28.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m1024\u001b[0m\n", + "\u001b[32m2025-02-04 14:23:28.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-02-04 14:24:13.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mI need more info\u001b[0m\n", + "\u001b[32m2025-02-04 14:24:13.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-02-04 14:24:56.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m-B: It is transformed into assembly and/or binary form.\n", + "\u001b[0m\n", + "\u001b[32m2025-02-04 14:24:56.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-02-04 14:25:39.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:25:39.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-02-04 14:26:43.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m-C\u001b[0m\n", + "\u001b[32m2025-02-04 14:26:43.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-02-04 14:27:49.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:27:49.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-02-04 14:28:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:28:38.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-02-04 14:29:23.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:29:23.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-02-04 14:30:13.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:30:13.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:31:13.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-02-04 14:32:03.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:32:03.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-02-04 14:32:53.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:32:53.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:38.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:38.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:38.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:38.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:40.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:40.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:48.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:48.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:56.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", + "\u001b[32m2025-02-04 14:33:56.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:04.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m6\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:04.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:11.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:11.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:19.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:19.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:27.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", + "\u001b[32m2025-02-04 14:34:27.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:35:27.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-04 14:35:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:35:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-04 14:35:43.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:35:43.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-04 14:35:51.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m2\u001b[0m\n", + "\u001b[32m2025-02-04 14:35:51.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:00.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:00.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:07.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:07.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:16.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:16.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:24.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:24.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:31.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:31.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:38.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m7\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:38.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:39.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:39.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:40.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 14:36:40.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:37:40.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-04 14:37:46.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m4\u001b[0m\n", + "\u001b[32m2025-02-04 14:37:46.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-04 14:37:52.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:37:52.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-04 14:37:57.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:37:57.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:02.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m25\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:02.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:07.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:07.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:12.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:12.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:18.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m1\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:18.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:23.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:23.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:28.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m1\u001b[0m\n", + "\u001b[32m2025-02-04 14:38:28.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:28.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:34.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:34.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:39.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:39.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:44.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:44.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-04 14:39:49.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document_questions(\n", + " downloaded_document, document_data, model\n", + " )\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "id": "EYYJgWf6lyha", + "outputId": "6dffada6-e5ca-4b30-bf8a-dba361486295" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "26 26 https://authorsalliance.org/wp-content/uploads... \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "90 90 https://arxiv.org/pdf/2302.13971 \n", + "\n", + " type \\\n", + "26 Techincal Documentation \n", + "28 Scientific Report \n", + "34 Scientific Report \n", + "68 Techincal Documentation \n", + "78 Techincal Documentation \n", + "90 Scientific Report \n", + "\n", + " section \\\n", + "26 CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK... \n", + "28 3.1 Experimental Setup \n", + "34 3.2 Results \n", + "68 5.2. Thread Hierarchy \n", + "78 23.1. What is Lazy Loading? \n", + "90 2.1 Pre-training Data \n", + "\n", + " question answer \\\n", + "26 Are Gold Open Access and Green Open Access mut... NO \n", + "28 How many large language models were evaluated? 5 \n", + "34 How many random samples were examined to under... 100 \n", + "68 Can you identify a thread with a four-dimensio... NO \n", + "78 Can you enable lazy loading by setting the env... NO \n", + "90 How many languages did the Wikipedia data cover? 20 \n", + "\n", + " pred_answer pred_section \n", + "26 YES NaN \n", + "28 FIVE NaN \n", + "34 50 NaN \n", + "68 I NEED MORE INFO NaN \n", + "78 YES NaN \n", + "90 NUMBER NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documenttypesectionquestionanswerpred_answerpred_section
2626https://authorsalliance.org/wp-content/uploads...Techincal DocumentationCHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK...Are Gold Open Access and Green Open Access mut...NOYESNaN
2828https://arxiv.org/pdf/2201.11903Scientific Report3.1 Experimental SetupHow many large language models were evaluated?5FIVENaN
3434https://arxiv.org/pdf/2201.11903Scientific Report3.2 ResultsHow many random samples were examined to under...10050NaN
6868https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOI NEED MORE INFONaN
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation23.1. What is Lazy Loading?Can you enable lazy loading by setting the env...NOYESNaN
9090https://arxiv.org/pdf/2302.13971Scientific Report2.1 Pre-training DataHow many languages did the Wikipedia data cover?20NUMBERNaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 26,\n \"max\": 90,\n \"num_unique_values\": 6,\n \"samples\": [\n 26,\n 28,\n 90\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\",\n \"https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Scientific Report\",\n \"Techincal Documentation\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?\",\n \"3.1 Experimental Setup\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Are Gold Open Access and Green Open Access mutually exclusive.\",\n \"How many large language models were evaluated?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"5\",\n \"20\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"FIVE\",\n \"NUMBER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 12 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "b60f3e7f-06d1-446f-bc4b-ff0f6f1b91ac" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.941747572815534" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "rjMNQp8-sZn9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" }, - "language_info": { - "name": "python", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index b5877a5..e65e98a 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -38,26 +38,20 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QrgOGtuGlyhT", - "outputId": "fbafc71f-3eca-4148-d567-308268381c6f" + "outputId": "ef47af4b-6bee-4dda-c559-e4e5fee1c54b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Cloning into 'structured-qa'...\n", - "remote: Enumerating objects: 893, done.\u001b[K\n", - "remote: Counting objects: 100% (331/331), done.\u001b[K\n", - "remote: Compressing objects: 100% (192/192), done.\u001b[K\n", - "remote: Total 893 (delta 211), reused 195 (delta 126), pack-reused 562 (from 1)\u001b[K\n", - "Receiving objects: 100% (893/893), 2.42 MiB | 12.01 MiB/s, done.\n", - "Resolving deltas: 100% (493/493), done.\n" + "fatal: destination path 'structured-qa' already exists and is not an empty directory.\n" ] } ], @@ -67,13 +61,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 13, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 1000 }, "id": "S22kTrfPlyhU", - "outputId": "22d372c0-9182-47af-a1cb-11da751f86b5" + "outputId": "ae9617bf-00f7-4b50-d59d-538b285f4eb5" }, "outputs": [ { @@ -84,96 +79,87 @@ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev104+g20f9e3f) (0.27.1)\n", - "Collecting loguru (from structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev104+g20f9e3f) (2.10.6)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev104+g20f9e3f) (6.0.2)\n", - "Collecting rapidfuzz (from structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev104+g20f9e3f) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev104+g20f9e3f) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev104+g20f9e3f) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev104+g20f9e3f) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev104+g20f9e3f) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m55.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m53.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", + "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.27.1)\n", + "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (2.10.6)\n", + "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.0.17)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (6.0.2)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (3.12.1)\n", + "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (1.41.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev111+g97049d6) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (2.27.2)\n", + "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev111+g97049d6) (1.25.2)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.10.2)\n", + "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.0.0)\n", + "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.44)\n", + "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.9.1)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.17.0)\n", + "Building wheels for collected packages: structured-qa\n", " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev104+g20f9e3f-py3-none-any.whl size=13245 sha256=852eab90b4d55da99708db4c4a22243dfaa2d126838195140457d8926c7aeb9e\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev111+g97049d6-py3-none-any.whl size=13247 sha256=a18780844c04a51ee112c6177e9ed610585c15d00f2e5f2dfefa1dcd4d27f151\n", " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=ab54f7d454f84637ecb160a1e724611ea9da5404833ae692888ff83db75821c3\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - "Successfully built structured-qa fire\n", - "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", - "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.12.1 streamlit-1.41.1 structured-qa-0.3.3.dev104+g20f9e3f watchdog-6.0.0\n" + "Successfully built structured-qa\n", + "Installing collected packages: structured-qa\n", + " Attempting uninstall: structured-qa\n", + " Found existing installation: structured-qa 0.3.3.dev111+g97049d6\n", + " Uninstalling structured-qa-0.3.3.dev111+g97049d6:\n", + " Successfully uninstalled structured-qa-0.3.3.dev111+g97049d6\n", + "Successfully installed structured-qa-0.3.3.dev111+g97049d6\n" ] + }, + { + "data": { + "application/vnd.colab-display-data+json": { + "id": "9e4f0553c0ad49d7a1731d9def243c03", + "pip_warning": { + "packages": [ + "structured_qa" + ] + } + } + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -191,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 14, "metadata": { "id": "iJ812u2llyhV" }, @@ -210,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "metadata": { "id": "jWlaKC5qXZrh" }, @@ -230,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 16, "metadata": { "id": "oFU-eYMVlyhX" }, @@ -255,9 +241,7 @@ " question = row[\"question\"]\n", " logger.info(f\"Question: {question}\")\n", " response = model.model.generate_content([section_file.read_text(), question])\n", - " logger.info(response.text)\n", - " response_json = json.loads(response.text)\n", - " answers[index] = response_json[\"answer\"]\n", + " answers[index] = response.text\n", " sections[index] = None\n", " model.n += 1\n", " return answers, sections" @@ -274,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": { "id": "6RoEbYj3XZri" }, @@ -285,7 +269,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": { "id": "whtSJwdrlyhZ" }, @@ -311,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": { "id": "ObsvwlNslyhZ" }, @@ -332,500 +316,191 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "AZBwRnfjlyhZ", - "outputId": "6e4083db-4a64-49be-986e-d39ff4963e2b" + "outputId": "9d34f3ea-1b9a-40e2-a6ac-9d1d6c00d6c6" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2025-02-03 13:58:31.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:31.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:31.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:33.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 175\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:33.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:34.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:34.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:34.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:36.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:36.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:36.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:37.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:37.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:38.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:38.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:40.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:40.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:41.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:41.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:41.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:43.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:43.022\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:44.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 20\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:44.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:44.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:45.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:45.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:58:45.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:45.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:47.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:47.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:49.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"0.1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:49.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:49.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:50.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:50.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:50.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:52.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:52.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:53.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:53.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:53.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:55.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:55.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:55.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:56.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:56.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:58.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:58.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:59.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 14\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 13:59:59.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-03 14:00:00.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:00:00.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:00.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:03.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:03.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:03.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:04.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 20\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:04.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:06.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:06.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:06.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:08.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:08.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:08.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:09.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:09.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:10.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:10.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:10.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:11.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:11.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:13.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 5\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:13.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:13.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:15.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": 100\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:15.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:15.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:16.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:16.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:01:16.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:16.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:17.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:17.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:17.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:19.051\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:19.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:19.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:20.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:20.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:20.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:21.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:21.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:21.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:22.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:22.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:22.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:24.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:24.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:26.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:26.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:26.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:27.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:27.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:30.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:30.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:31.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:31.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:02:31.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:31.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:33.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:33.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:33.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:34.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 8\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:34.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:35.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:35.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:35.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:37.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1024\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:37.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:38.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:38.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:38.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:39.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:39.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:40.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4000\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:40.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:40.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:42.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 0.1\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:42.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:42.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:43.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:43.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:44.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:03:44.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:44.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:45.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:45.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:47.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:47.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:47.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:48.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:48.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:50.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:50.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:50.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:51.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:51.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:53.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:53.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:53.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:55.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:55.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:55.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:56.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:56.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:57.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 2\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:57.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:57.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:59.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 6\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:04:59.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:05:59.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:00.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:00.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:03.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:03.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:04.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:04.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:04.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:06.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:06.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:06.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:08.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:08.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:09.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:10.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:12.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:12.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:14.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:14.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:14.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:17.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:17.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:17.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:19.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:19.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:06:19.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:19.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:20.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:20.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:21.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 7\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:21.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:21.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:23.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:23.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:24.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:24.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:26.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 1\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:26.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:26.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:27.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:27.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:28.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - "\"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:28.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:28.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:29.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:29.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:30.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:30.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:30.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:32.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:32.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:07:32.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:32.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:33.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:33.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:35.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:35.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:35.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:40.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"1\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:40.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:41.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:41.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:41.861\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:43.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 4\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:43.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:44.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:44.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:44.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:46.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:46.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:46.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:47.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:47.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:49.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:49.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:49.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:50.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:50.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:08:50.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:50.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:52.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:52.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:55.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 3\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:55.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:55.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:56.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:56.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:56.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:57.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"No\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:57.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:57.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:59.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:09:59.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:00.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:00.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:00.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:02.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:02.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:04.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"Yes\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:04.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:04.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:06.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"A\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:06.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:07.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"C\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:07.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:10:07.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-03 14:11:07.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-03 14:11:09.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": 25\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:11:09.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-03 14:11:11.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"B\"\n", - "}\u001b[0m\n", - "\u001b[32m2025-02-03 14:11:11.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:11:11.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-02-03 14:11:14.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m{\n", - " \"answer\": \"2015\"\n", - "}\u001b[0m\n" + "\u001b[32m2025-02-04 15:05:23.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:23.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:23.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:25.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:26.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:26.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:27.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:27.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:29.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:30.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:32.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:33.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:33.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:34.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:36.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:36.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:37.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:05:37.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:37.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:39.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:40.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:40.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:41.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:41.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:43.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:44.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:44.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:46.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:46.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:47.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:49.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:50.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-04 15:06:51.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:51.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:53.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:53.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:54.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:55.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:55.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:57.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:57.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-04 15:07:59.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:00.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:00.736\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:02.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:04.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:04.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:05.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:05.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:07.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:08:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:08.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:09.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:10.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:10.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:11.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:11.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:13.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:13.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:14.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:14.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:15.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:17.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:17.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:19.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:20.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:22.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:09:22.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:22.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:24.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:24.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:26.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:27.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:27.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:28.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:30.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:30.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:31.461\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:33.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:33.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:34.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:34.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:36.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-02-04 15:10:37.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:37.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:39.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:40.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:40.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:41.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:43.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:43.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:44.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:45.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:45.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:46.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:46.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:48.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:49.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:49.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-04 15:11:51.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:51.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:52.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:54.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:55.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:55.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:56.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:56.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:57.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-04 15:12:59.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-04 15:13:00.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-04 15:13:01.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:13:01.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 15:13:03.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:13:03.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-04 15:13:04.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:13:04.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:04.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:05.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:07.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:07.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:08.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:10.462\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:11.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:11.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:13.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:15.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:15.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:16.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:17.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:17.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:18.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:14:18.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:18.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:20.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:21.864\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:21.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:23.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:24.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:24.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:26.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:27.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:27.461\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:29.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:29.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:30.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:32.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:32.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:33.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:15:33.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:33.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:35.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:37.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:37.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:38.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:38.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:40.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:40.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:41.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:42.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:42.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:44.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:45.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:45.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:47.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:48.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:16:48.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", + "\u001b[32m2025-02-04 15:17:48.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-02-04 15:17:50.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-02-04 15:17:52.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 15:17:52.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n" ] } ], @@ -863,25 +538,25 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 112 + "height": 175 }, "id": "EYYJgWf6lyha", - "outputId": "99160292-3796-4226-94b2-56fae4d048e6" + "outputId": "7e1d48f5-8520-4ee1-a900-092b41cbc1dd" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 33,\n \"max\": 42,\n \"num_unique_values\": 2,\n \"samples\": [\n 42,\n 33\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"CARD AND TILE COSTS\",\n \"3.4 Robustness of Chain of Thought\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\",\n \"How many annotators provided independent chains of thought?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"YES\",\n \"3\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "summary": "{\n \"name\": \"results\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 39,\n \"min\": 10,\n \"max\": 94,\n \"num_unique_values\": 4,\n \"samples\": [\n 51,\n 94,\n 10\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Scientific Paper\",\n \"Board Game\",\n \"Scientific Report\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"LOOKOUT PHASE\",\n \"3 Main results\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Is there a limit to the number of cards a player may have in their hand?\",\n \"Was the model compared against GPT-4?\",\n \"What was the dropout rate used for the base model?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"NO\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"0. 1\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe" }, "text/html": [ "\n", - "
\n", + "
\n", "
\n", "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documenttypesectionquestionanswerpred_answerpred_section
22https://arxiv.org/pdf/1706.03762Scientific Paper3.1 Encoder and Decoder StacksHow many layers compose the decoder?6N=6NaN
1010https://arxiv.org/pdf/1706.03762Scientific Paper5.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP= 0.1NaN
2828https://arxiv.org/pdf/2201.11903Scientific Report3.1 Experimental SetupHow many large language models were evaluated?5FIVENaN
3232https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningWhich symbolic reasoning task is used as an ou...AI NEED MORE INFONaN
3434https://arxiv.org/pdf/2201.11903Scientific Report3.2 ResultsHow many random samples were examined to under...10050NaN
3737https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE EFFECTSHow many different races are there?6I NEED MORE INFONaN
4141https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
4242https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
4545https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BI NEED MORE INFONaN
5555https://github.com/mozilla-ai/structured-qa/re...Board GameEXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
5656https://github.com/mozilla-ai/structured-qa/re...Board GameEXPEDITION PHASEHow many victory points you get from each conq...1I NEED MORE INFONaN
6262https://github.com/mozilla-ai/structured-qa/re...Board GameGAME ENDIf player 1 has 30 Victory points and 4 worker...ACNaN
6868https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOI NEED MORE INFONaN
9090https://arxiv.org/pdf/2302.13971Scientific Report2.1 Pre-training DataHow many languages did the Wikipedia data cover?208NaN
9494https://arxiv.org/pdf/2302.13971Scientific Report3 Main resultsWas the model compared against GPT-4?NOI NEED MORE INFONaN
9898https://assets.publishing.service.gov.uk/media...RegulationLimitations of generative AI and LLMsWhich of the following is not considered a lim...CI NEED MORE INFONaN
100100https://assets.publishing.service.gov.uk/media...RegulationProcurement in an emerging marketWhich of the following is NOT mentioned as a r...CI NEED MORE INFONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 17,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 2,\n \"max\": 100,\n \"num_unique_values\": 17,\n \"samples\": [\n 2,\n 10,\n 37\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Scientific Report\",\n \"Regulation\",\n \"Board Game\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"GAME END\",\n \"2.1 Pre-training Data\",\n \"3.1 Encoder and Decoder Stacks\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"How many layers compose the decoder?\",\n \"What was the dropout rate used for the base model?\",\n \"How many different races are there?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"NO\",\n \"6\",\n \"20\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"C\",\n \"PDROP= 0.1\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] }, - "id": "mltqL7Bhq3m1", - "outputId": "7779f94c-b541-4463-98d5-17e049981a40" - }, - "outputs": [ { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 10,\n \"max\": 65,\n \"num_unique_values\": 10,\n \"samples\": [\n 55,\n 22,\n 51\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=OJ:L_202401689\",\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/2201.11903\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"Classification of general-purpose AI models as general-purpose AI models with systemic risk\",\n \"CHAPTER OVERVIEW\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\",\n \"What is the threshold, measured in floating point operations, that leads to a presumption that a general-purpose AI model has systemic risk? -A: 10^1 -B: 10^20 -C: 10^25\",\n \"After taking a landmark tile, do you reveal a new tile and the end of your turn?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"C\",\n \"YES\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"B\",\n \"YES\",\n \"PDROP= 0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c4z9XxXWq3m1", + "outputId": "4d72cd75-425a-404e-e793-a490e6f8c5f5" }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP= 0.1NaN
2222https://eur-lex.europa.eu/legal-content/EN/TXT...Classification of general-purpose AI models as...What is the threshold, measured in floating po...CBNaN
2828https://eur-lex.europa.eu/legal-content/EN/TXT...Compliant AI systems which present a riskWhat is the time period for a market surveilla...CANaN
4444https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?63NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5353https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSIf a player is missing 2 skill symbols, how ma...2NO\\n- SINGLE LETTER (FOR MULTIPLE-CHOICE QUEST...NaN
5555https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BANaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8349514563106796" + ] + }, + "metadata": {}, + "execution_count": 14 + } ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "22 22 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "28 28 https://eur-lex.europa.eu/legal-content/EN/TXT... \n", - "44 44 https://arxiv.org/pdf/2201.11903 \n", - "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", - "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "53 53 https://github.com/mozilla-ai/structured-qa/re... \n", - "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", - "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", - "\n", - " section \\\n", - "10 5.4 Regularization \n", - "22 Classification of general-purpose AI models as... \n", - "28 Compliant AI systems which present a risk \n", - "44 3.2 Results \n", - "47 CARD AND TILE EFFECTS \n", - "51 CHAPTER OVERVIEW \n", - "52 CARD AND TILE COSTS \n", - "53 CARD AND TILE COSTS \n", - "55 CARD AND TILE EFFECTS \n", - "65 EXPEDITION PHASE \n", - "\n", - " question answer \\\n", - "10 What was the dropout rate used for the base mo... 0.1 \n", - "22 What is the threshold, measured in floating po... C \n", - "28 What is the time period for a market surveilla... C \n", - "44 How many random samples were examined to under... 100 \n", - "47 How many different races are there? 6 \n", - "51 After taking a landmark tile, do you reveal a ... NO \n", - "52 Can a player pay coins to compensate for missi... YES \n", - "53 If a player is missing 2 skill symbols, how ma... 2 \n", - "55 Which type of cards provide coins? -A: Gray -B... B \n", - "65 Do you need a fish to conquer a distant island? YES \n", - "\n", - " pred_answer pred_section \n", - "10 PDROP= 0.1 NaN \n", - "22 B NaN \n", - "28 A NaN \n", - "44 50 NaN \n", - "47 3 NaN \n", - "51 YES NaN \n", - "52 NO NaN \n", - "53 NO\\n- SINGLE LETTER (FOR MULTIPLE-CHOICE QUEST... NaN \n", - "55 A NaN \n", - "65 NO NaN " + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "for index, result in results.iterrows():\n", - " if result[\"pred_answer\"].startswith(\n", - " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", - " ):\n", - " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "c4z9XxXWq3m1", - "outputId": "ed00666d-0e36-4e5c-9b62-a2ea14c441cc" - }, - "outputs": [ { - "data": { - "text/plain": [ - "0.898989898989899" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "UXg_TC7R28QI" + }, + "outputs": [], + "source": [] } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "UXg_TC7R28QI" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "00b65fb9908f4e1dbd09b8cc59235605": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "02036bfb1d074b4d89c89bb5faae004a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0369b053c6424fd7bad357433df8dc85": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "05fd61c2436242619810c829734411ac": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_edcb83aee99b4d67a3e5c1e3f73e418f", - "placeholder": "​", - "style": "IPY_MODEL_7f115f13f6bd40ff8273c8f70c888e9d", - "value": " 232k/232k [00:00<00:00, 1.09MB/s]" - } - }, - "07c171affc4b4c548b79dd1153d3150b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0ea58730ba05467dba2f60fea0f1eddc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_07c171affc4b4c548b79dd1153d3150b", - "max": 1633, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ade3b5dbacd1438380933cac3eb81800", - "value": 1633 - } - }, - "10d52f98ecc6417d96cf53cac822c9c6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "16d485c3159b44efaff7e6392a73ed11": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_02036bfb1d074b4d89c89bb5faae004a", - "placeholder": "​", - "style": "IPY_MODEL_2a99d23d260e41b491d6cefdad20e08a", - "value": " 743/743 [00:00<00:00, 53.7kB/s]" - } - }, - "1e00c5a5c32b4708a12ac145a2a08a94": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "20461602ac2647a4a8607f89e895cbd1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "25ad96bfb5ba48258320596a2b65c9c0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c566d7af0eb14e4db6f53b2426cbe5da", - "IPY_MODEL_c9f1003b9ea747a78bde82411de091ba", - "IPY_MODEL_38e676d196d14126b306cf95e83f9a14" - ], - "layout": "IPY_MODEL_effc2dbaafe8467ca8bc0e151a72566f" - } - }, - "2612c23e742949a0ae3f37c1acbe54a9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_56c24d6c3bb84ebeb8e63ae7685b622c", - "placeholder": "​", - "style": "IPY_MODEL_d605bbfa7de945ff8f2908fc601d786d", - "value": "vocab.txt: 100%" - } - }, - "2913682df125489d8dabe312893c96bc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_fdf81aa2e92d4c3c998798e820d5c3c8", - "IPY_MODEL_3c3166f5f51946939a03ada7d6ea951a", - "IPY_MODEL_ec00a4a5db4c4f6dbc9ffd6064534d6e" - ], - "layout": "IPY_MODEL_a6fd770f03f440c5859d1fe2e49351d4" - } - }, - "2a54cabb44c041d08863c2985f58307d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4b82be23d7174558821158562380f29b", - "IPY_MODEL_0ea58730ba05467dba2f60fea0f1eddc", - "IPY_MODEL_fe986e4b2af14539ac7c8ebd9693579a" - ], - "layout": "IPY_MODEL_b2d198b0d4a94766a2344f5aa2748e8c" - } - }, - "2a99d23d260e41b491d6cefdad20e08a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "2f8c15ff6cac41fca08cbd7e1e3a1620": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2612c23e742949a0ae3f37c1acbe54a9", - "IPY_MODEL_b7fcea6c5c624801ba709bcdd02bb0de", - "IPY_MODEL_05fd61c2436242619810c829734411ac" - ], - "layout": "IPY_MODEL_9a7fffad5ad0476d95b78b722a56e757" - } - }, - "307c70a3dfbd4c4c97b1f9797c935b50": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "37e061fb7a704c40bd0ee66b3091853b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "38a552b48b91413183504730b900d0bc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "38e676d196d14126b306cf95e83f9a14": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_42f729466b3a401bb6f1f6079b369e41", - "placeholder": "​", - "style": "IPY_MODEL_00b65fb9908f4e1dbd09b8cc59235605", - "value": " 466k/466k [00:00<00:00, 974kB/s]" - } - }, - "3addfa7153ad4674aad4bb8d32268e7e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3aecfd22fa5c48bc8ebca076cd802f4c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_89a7175a759e4722a74060d98128487c", - "IPY_MODEL_f4baaaa3aadc4bbb9e2f0087ce6273b1", - "IPY_MODEL_f2c9cf22f1ec47d18dbbd14fd60bd525" - ], - "layout": "IPY_MODEL_3addfa7153ad4674aad4bb8d32268e7e" - } - }, - "3c3166f5f51946939a03ada7d6ea951a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_307c70a3dfbd4c4c97b1f9797c935b50", - "max": 405, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_9211cb4be41d47d7a6dede4b51621111", - "value": 405 - } - }, - "42d0723e2451483f8786a6522680a7c0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "42f729466b3a401bb6f1f6079b369e41": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4a2c7268f067428a950351f564432f4e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4b82be23d7174558821158562380f29b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a9fba03ff33048ce92eef9452837999c", - "placeholder": "​", - "style": "IPY_MODEL_37e061fb7a704c40bd0ee66b3091853b", - "value": "artifact.metadata: 100%" - } - }, - "4be9ea11e9c54626810d6d3ff5d073b8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4fae803d8c4f45e9aec71bf7fd1bde01": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "53758cdbe1b0452cafdd55640b1165d6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "56c24d6c3bb84ebeb8e63ae7685b622c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5ade4f272cc346f9aac412049a3d13e2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "66c946c5b1434087a3c838feabff07c5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "687cf1258e2b47ffb1a169307f78cd61": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "68c078f715454f3c953787aeb38fe1be": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7562207c53ac41e48daafbd9518ea580": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "7a2bbc79b1ba4dacad15b290e29e869f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "7f115f13f6bd40ff8273c8f70c888e9d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "81d5f2d59dc2425a87ea419b345ab988": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "86ed0f5dbf0a4cd2855d2e635ee68aaf": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "87b3d8d920544fd2b8d1eae105441ef9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8923512c87c244c19783ff200b4d5454": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_87b3d8d920544fd2b8d1eae105441ef9", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_8a7b840fb46c434d8ebe6bbe5c69a044", - "value": 8098525888 - } - }, - "89a7175a759e4722a74060d98128487c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_38a552b48b91413183504730b900d0bc", - "placeholder": "​", - "style": "IPY_MODEL_cd93f50f87cb4169b08110f03545f8bd", - "value": "model.safetensors: 100%" - } - }, - "8a7b840fb46c434d8ebe6bbe5c69a044": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8c4849cdd00540519c18855be4376548": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8ec927e652084cc0bf5b513275f5efc3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8f553121841441ba8dcd4fc19cadd037": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b99313423b894fb193eb77aa638b0422", - "placeholder": "​", - "style": "IPY_MODEL_c8a7f2ff55d34102a19189e23feb3ae4", - "value": " 112/112 [00:00<00:00, 7.14kB/s]" - } - }, - "9211cb4be41d47d7a6dede4b51621111": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "9a7fffad5ad0476d95b78b722a56e757": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a6fd770f03f440c5859d1fe2e49351d4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a9fba03ff33048ce92eef9452837999c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "abace962a40b49cd8d21047f0c961620": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c021f82bc25341e7a10112750316a37b", - "IPY_MODEL_e826fb0e17594e3e9c1cb81c7848b636", - "IPY_MODEL_16d485c3159b44efaff7e6392a73ed11" - ], - "layout": "IPY_MODEL_4be9ea11e9c54626810d6d3ff5d073b8" - } - }, - "ade3b5dbacd1438380933cac3eb81800": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b1a9887c49774487849bd47ae9edc927": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_42d0723e2451483f8786a6522680a7c0", - "placeholder": "​", - "style": "IPY_MODEL_53758cdbe1b0452cafdd55640b1165d6", - "value": "special_tokens_map.json: 100%" - } - }, - "b2d198b0d4a94766a2344f5aa2748e8c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b5d106d794bd4aaa9cbfed7c8f98b1ab": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b7fcea6c5c624801ba709bcdd02bb0de": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_10d52f98ecc6417d96cf53cac822c9c6", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_7562207c53ac41e48daafbd9518ea580", - "value": 231508 - } - }, - "b99313423b894fb193eb77aa638b0422": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c021f82bc25341e7a10112750316a37b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_86ed0f5dbf0a4cd2855d2e635ee68aaf", - "placeholder": "​", - "style": "IPY_MODEL_ffcd545025454565866309ab2028a986", - "value": "config.json: 100%" - } - }, - "c566d7af0eb14e4db6f53b2426cbe5da": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8c4849cdd00540519c18855be4376548", - "placeholder": "​", - "style": "IPY_MODEL_c5bd24a20d4645c0937c2a4811da73fc", - "value": "tokenizer.json: 100%" - } - }, - "c5bd24a20d4645c0937c2a4811da73fc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c8a7f2ff55d34102a19189e23feb3ae4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c9f1003b9ea747a78bde82411de091ba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ced0e4662f0e42fa8b82775f50efd15b", - "max": 466081, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_8ec927e652084cc0bf5b513275f5efc3", - "value": 466081 - } - }, - "cd93f50f87cb4169b08110f03545f8bd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cd9a8d967f97499fb4bc77f945234dfe": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_66c946c5b1434087a3c838feabff07c5", - "placeholder": "​", - "style": "IPY_MODEL_e47b2666d47347639b264347c2f87ba2", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" - } - }, - "cdac7b7c8a464aa6bea2d3bdd1106479": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ced0e4662f0e42fa8b82775f50efd15b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d13d60c460fd46468bd1aacb42a50939": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_cd9a8d967f97499fb4bc77f945234dfe", - "IPY_MODEL_8923512c87c244c19783ff200b4d5454", - "IPY_MODEL_f7317270a3c941bfa7c147aa19ae6111" - ], - "layout": "IPY_MODEL_68c078f715454f3c953787aeb38fe1be" - } - }, - "d605bbfa7de945ff8f2908fc601d786d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d8f3f0b7a0384097a217fd77d319a966": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e47b2666d47347639b264347c2f87ba2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e5bdc52a220c42199cf97208f8fcc367": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e826fb0e17594e3e9c1cb81c7848b636": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d8f3f0b7a0384097a217fd77d319a966", - "max": 743, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_f674f0b58eca4813bebfb092b7732d31", - "value": 743 - } - }, - "e99e3a300b2742c39484c9d403f6c4fb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ec00a4a5db4c4f6dbc9ffd6064534d6e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4a2c7268f067428a950351f564432f4e", - "placeholder": "​", - "style": "IPY_MODEL_4fae803d8c4f45e9aec71bf7fd1bde01", - "value": " 405/405 [00:00<00:00, 24.6kB/s]" - } - }, - "ec77e97cd7dc4a6f92f4fb7be8e807f7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "edcb83aee99b4d67a3e5c1e3f73e418f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ee1a98f082e84f57913a8c77d1b46cbb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b1a9887c49774487849bd47ae9edc927", - "IPY_MODEL_f77e53f86c10449d800035b20394067b", - "IPY_MODEL_8f553121841441ba8dcd4fc19cadd037" - ], - "layout": "IPY_MODEL_ff78ef1f30db4046a58f313e4488b55c" - } - }, - "effc2dbaafe8467ca8bc0e151a72566f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f2c9cf22f1ec47d18dbbd14fd60bd525": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_687cf1258e2b47ffb1a169307f78cd61", - "placeholder": "​", - "style": "IPY_MODEL_20461602ac2647a4a8607f89e895cbd1", - "value": " 438M/438M [00:04<00:00, 176MB/s]" - } - }, - "f4baaaa3aadc4bbb9e2f0087ce6273b1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cdac7b7c8a464aa6bea2d3bdd1106479", - "max": 438349816, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_1e00c5a5c32b4708a12ac145a2a08a94", - "value": 438349816 - } - }, - "f674f0b58eca4813bebfb092b7732d31": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "f7317270a3c941bfa7c147aa19ae6111": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ec77e97cd7dc4a6f92f4fb7be8e807f7", - "placeholder": "​", - "style": "IPY_MODEL_e99e3a300b2742c39484c9d403f6c4fb", - "value": " 8.10G/8.10G [03:14<00:00, 42.5MB/s]" - } - }, - "f77e53f86c10449d800035b20394067b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b5d106d794bd4aaa9cbfed7c8f98b1ab", - "max": 112, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e5bdc52a220c42199cf97208f8fcc367", - "value": 112 - } - }, - "fdf81aa2e92d4c3c998798e820d5c3c8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_81d5f2d59dc2425a87ea419b345ab988", - "placeholder": "​", - "style": "IPY_MODEL_5ade4f272cc346f9aac412049a3d13e2", - "value": "tokenizer_config.json: 100%" - } - }, - "fe986e4b2af14539ac7c8ebd9693579a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0369b053c6424fd7bad357433df8dc85", - "placeholder": "​", - "style": "IPY_MODEL_7a2bbc79b1ba4dacad15b290e29e869f", - "value": " 1.63k/1.63k [00:00<00:00, 130kB/s]" - } - }, - "ff78ef1f30db4046a58f313e4488b55c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ffcd545025454565866309ab2028a986": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "cbf9b845e0a647dbab49858de388e2fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e8b5a97e883c42efa65916425b0fabcb", + "IPY_MODEL_b2103d8dd19e4db0a76a69347cdcf821", + "IPY_MODEL_854a17b429c84b83988c6a3a8ce6a692" + ], + "layout": "IPY_MODEL_5d2b6079003749c4b12d18e11b78dfea" + } + }, + "e8b5a97e883c42efa65916425b0fabcb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bc80799bc3b14f7497b9a004ccd6c523", + "placeholder": "​", + "style": "IPY_MODEL_8f502b7de4b0425b93648dfda170cd3f", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "b2103d8dd19e4db0a76a69347cdcf821": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a980dfba58524f63aef61f7c3841b552", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_29c42506cc2b4ca0950cec2257f73396", + "value": 8098525888 + } + }, + "854a17b429c84b83988c6a3a8ce6a692": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_10e18945e3154720b12f77d4e9d043e3", + "placeholder": "​", + "style": "IPY_MODEL_89b9015832af4c05beed6dbb08b7fd40", + "value": " 8.10G/8.10G [03:12<00:00, 41.4MB/s]" + } + }, + "5d2b6079003749c4b12d18e11b78dfea": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bc80799bc3b14f7497b9a004ccd6c523": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8f502b7de4b0425b93648dfda170cd3f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a980dfba58524f63aef61f7c3841b552": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "29c42506cc2b4ca0950cec2257f73396": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "10e18945e3154720b12f77d4e9d043e3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "89b9015832af4c05beed6dbb08b7fd40": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "74ca4b0905af4fc2b02d01391ceb95b8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_93a525e734ce44dc9636fbc0bb34cb7f", + "IPY_MODEL_3353f87c26a547ad946d8fefe00bc884", + "IPY_MODEL_043e7fa0088845eb8f40b9837edfa1ac" + ], + "layout": "IPY_MODEL_71a9a5f4f6564d99b5b31951a01bde8a" + } + }, + "93a525e734ce44dc9636fbc0bb34cb7f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7a3ea23c3b9e4c18937a1d99cc6d8271", + "placeholder": "​", + "style": "IPY_MODEL_9755ef6931444acdbe697eebc76af403", + "value": "artifact.metadata: 100%" + } + }, + "3353f87c26a547ad946d8fefe00bc884": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_100502f9bcae4be598afcd3a9bb8a79b", + "max": 1633, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9889c1deb2e744148cd7b3eba4d6e91e", + "value": 1633 + } + }, + "043e7fa0088845eb8f40b9837edfa1ac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_69126f58ca2b4e2b99bc82de73e4594e", + "placeholder": "​", + "style": "IPY_MODEL_6aa77b4aaf4c46999863450e84fc0b49", + "value": " 1.63k/1.63k [00:00<00:00, 120kB/s]" + } + }, + "71a9a5f4f6564d99b5b31951a01bde8a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a3ea23c3b9e4c18937a1d99cc6d8271": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9755ef6931444acdbe697eebc76af403": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "100502f9bcae4be598afcd3a9bb8a79b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9889c1deb2e744148cd7b3eba4d6e91e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "69126f58ca2b4e2b99bc82de73e4594e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6aa77b4aaf4c46999863450e84fc0b49": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "11c9bc681a5c41079c1e3967d8201f5a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1b88f649686247e8bf3e1caff913ffc6", + "IPY_MODEL_1aacf3bfc433465eb954f8226a1d932d", + "IPY_MODEL_d4701f480c104ceb95e686f46a80a6dc" + ], + "layout": "IPY_MODEL_ef1f4349d94b476786ecd402877bb07a" + } + }, + "1b88f649686247e8bf3e1caff913ffc6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9fba1fe286ae44b99d1b810347400094", + "placeholder": "​", + "style": "IPY_MODEL_e3ff55f44f474996a49aabdf4cfb3f85", + "value": "config.json: 100%" + } + }, + "1aacf3bfc433465eb954f8226a1d932d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_31bd4b8908b54e689c89c6ee013deb77", + "max": 743, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_624257553afd440b8faf28595e0550aa", + "value": 743 + } + }, + "d4701f480c104ceb95e686f46a80a6dc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_32286dd08ec346bb9222af407f5878d5", + "placeholder": "​", + "style": "IPY_MODEL_5fbf897c0fe640829c178b6a03b0f143", + "value": " 743/743 [00:00<00:00, 53.7kB/s]" + } + }, + "ef1f4349d94b476786ecd402877bb07a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9fba1fe286ae44b99d1b810347400094": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e3ff55f44f474996a49aabdf4cfb3f85": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "31bd4b8908b54e689c89c6ee013deb77": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "624257553afd440b8faf28595e0550aa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "32286dd08ec346bb9222af407f5878d5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5fbf897c0fe640829c178b6a03b0f143": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eb2c0289f5e74b498e97adef2965e209": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0680cf1052524a2280057b4dfb741f76", + "IPY_MODEL_315406a845034ef4a3976f902d8f46ec", + "IPY_MODEL_b6c70c0eb7cf440dac46629a2053e15b" + ], + "layout": "IPY_MODEL_03962ee8656d41909b530ab86ce11c40" + } + }, + "0680cf1052524a2280057b4dfb741f76": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_62f76e44429a4dbe9dde7c1e7aca14c2", + "placeholder": "​", + "style": "IPY_MODEL_2838c4a5619c4a96a86c8e4b3cdf2801", + "value": "model.safetensors: 100%" + } + }, + "315406a845034ef4a3976f902d8f46ec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_688c493e0a5441faa3654456b9d4d630", + "max": 438349816, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a9bfa3f6f8ef4896a1bea01deb4d9230", + "value": 438349816 + } + }, + "b6c70c0eb7cf440dac46629a2053e15b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_eb4f05293da049f385e80bfe42f66003", + "placeholder": "​", + "style": "IPY_MODEL_15f5273a27a643ce95a5cc824e7ecd3c", + "value": " 438M/438M [00:05<00:00, 126MB/s]" + } + }, + "03962ee8656d41909b530ab86ce11c40": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "62f76e44429a4dbe9dde7c1e7aca14c2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2838c4a5619c4a96a86c8e4b3cdf2801": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "688c493e0a5441faa3654456b9d4d630": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9bfa3f6f8ef4896a1bea01deb4d9230": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "eb4f05293da049f385e80bfe42f66003": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "15f5273a27a643ce95a5cc824e7ecd3c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7e09e0ebcbcc4e0ea33a3aad61e926c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9f187ced53264c5ead2b55626181935a", + "IPY_MODEL_348b1fab14634220902d093ab34d5dd2", + "IPY_MODEL_399182af49e84396b0a775f24392a9d7" + ], + "layout": "IPY_MODEL_4d473a63d8344d28bf67d0cbd3d4f0c2" + } + }, + "9f187ced53264c5ead2b55626181935a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b00c6917fdb4c0eb6998a87e578b19f", + "placeholder": "​", + "style": "IPY_MODEL_23863ecb9a7a441d832aba8a0e22cf4d", + "value": "tokenizer_config.json: 100%" + } + }, + "348b1fab14634220902d093ab34d5dd2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_919382b35272473f97a5616319bd874b", + "max": 405, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dabcfb49df024d5c975d65ba9d8eed2f", + "value": 405 + } + }, + "399182af49e84396b0a775f24392a9d7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aa01a6bbe6b04c319ced1476b6bbcc36", + "placeholder": "​", + "style": "IPY_MODEL_9796ad3c0e6848459fb31c905583eb3e", + "value": " 405/405 [00:00<00:00, 34.4kB/s]" + } + }, + "4d473a63d8344d28bf67d0cbd3d4f0c2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b00c6917fdb4c0eb6998a87e578b19f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "23863ecb9a7a441d832aba8a0e22cf4d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "919382b35272473f97a5616319bd874b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dabcfb49df024d5c975d65ba9d8eed2f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "aa01a6bbe6b04c319ced1476b6bbcc36": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9796ad3c0e6848459fb31c905583eb3e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6d15d0fa977744049b20963732e356e0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_41341001649c4fb995b7d528a9e7b06c", + "IPY_MODEL_f5d46e7afd654e0987026dbab0238f98", + "IPY_MODEL_a518e66226fd43d6a5408e29ec5f7023" + ], + "layout": "IPY_MODEL_25252664df70406291be543379deafcd" + } + }, + "41341001649c4fb995b7d528a9e7b06c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c528f4f6d0f940bf8d3ba985eac2d4f9", + "placeholder": "​", + "style": "IPY_MODEL_15e4b108dc504f09b55784b3da3118d4", + "value": "vocab.txt: 100%" + } + }, + "f5d46e7afd654e0987026dbab0238f98": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_926a91b942e54e528ea421c71f34b6f8", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3e8dd89a4da649a99f25258bb88e2223", + "value": 231508 + } + }, + "a518e66226fd43d6a5408e29ec5f7023": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7837697ecc6b449a82b9977824ed52c6", + "placeholder": "​", + "style": "IPY_MODEL_fd0607d1969647869b154e5b02de1629", + "value": " 232k/232k [00:00<00:00, 7.09MB/s]" + } + }, + "25252664df70406291be543379deafcd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c528f4f6d0f940bf8d3ba985eac2d4f9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "15e4b108dc504f09b55784b3da3118d4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "926a91b942e54e528ea421c71f34b6f8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3e8dd89a4da649a99f25258bb88e2223": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7837697ecc6b449a82b9977824ed52c6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd0607d1969647869b154e5b02de1629": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8e1e1f696569401ca14517e9249b5f11": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_95e41f95919e41eebd177975c125c23e", + "IPY_MODEL_8fc933d79fd04da18c6f30499337ad70", + "IPY_MODEL_a2fcb36a1fbf4704834f16defae56c4f" + ], + "layout": "IPY_MODEL_c1a2d65b6b744929bcc7ca3754447700" + } + }, + "95e41f95919e41eebd177975c125c23e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_42b95efd640e4639be9f81a5a5701014", + "placeholder": "​", + "style": "IPY_MODEL_efc86504034045d28c80d972a242ae9e", + "value": "tokenizer.json: 100%" + } + }, + "8fc933d79fd04da18c6f30499337ad70": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c994599fe83548d6aca0da1777dbe346", + "max": 466081, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_eb665998412f479cb04e08ec778f6c83", + "value": 466081 + } + }, + "a2fcb36a1fbf4704834f16defae56c4f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6492cf6e34cf4a26a450d6f2f0fb6157", + "placeholder": "​", + "style": "IPY_MODEL_83dd61473fc342c1954f2e1adeac05c2", + "value": " 466k/466k [00:00<00:00, 6.76MB/s]" + } + }, + "c1a2d65b6b744929bcc7ca3754447700": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "42b95efd640e4639be9f81a5a5701014": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "efc86504034045d28c80d972a242ae9e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c994599fe83548d6aca0da1777dbe346": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eb665998412f479cb04e08ec778f6c83": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6492cf6e34cf4a26a450d6f2f0fb6157": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "83dd61473fc342c1954f2e1adeac05c2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a015e60089ea4921bd7b7719e13ba6a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b97ace123c2746e48daa2c4d99896d5d", + "IPY_MODEL_66cdd770e9ee4865977dea9b93ba3090", + "IPY_MODEL_d7a03ee80899428bad72944699468f6a" + ], + "layout": "IPY_MODEL_97f5515670f443528fa833c3827588af" + } + }, + "b97ace123c2746e48daa2c4d99896d5d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_65c0d0a4e2424d639a5e712c89fdf786", + "placeholder": "​", + "style": "IPY_MODEL_17345d6096e945e58ddd63e508c2d66a", + "value": "special_tokens_map.json: 100%" + } + }, + "66cdd770e9ee4865977dea9b93ba3090": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9823e32799c84de7a4ecf4a6eaf64dbc", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_139635d097c848c18449ef5994b08385", + "value": 112 + } + }, + "d7a03ee80899428bad72944699468f6a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_53706ea298284f24886deb2655df3729", + "placeholder": "​", + "style": "IPY_MODEL_d2a201e6a2664cc0a6194c2a336735fb", + "value": " 112/112 [00:00<00:00, 9.26kB/s]" + } + }, + "97f5515670f443528fa833c3827588af": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "65c0d0a4e2424d639a5e712c89fdf786": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "17345d6096e945e58ddd63e508c2d66a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9823e32799c84de7a4ecf4a6eaf64dbc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "139635d097c848c18449ef5994b08385": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "53706ea298284f24886deb2655df3729": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d2a201e6a2664cc0a6194c2a336735fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/benchmark/qwen_2_5_7B_full_context.ipynb b/benchmark/qwen_2_5_7B_full_context.ipynb index 27537cc..81dbb27 100644 --- a/benchmark/qwen_2_5_7B_full_context.ipynb +++ b/benchmark/qwen_2_5_7B_full_context.ipynb @@ -40,9 +40,23 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "id": "tk9uiWGn81j_" + "id": "tk9uiWGn81j_", + "outputId": "eb76f95c-48e6-4ead-a16f-03ce4fc53dac", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], "source": [ "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" ] @@ -55,14 +69,19 @@ "base_uri": "https://localhost:8080/" }, "id": "uBJnKqs_MqBV", - "outputId": "b2de9597-0143-4547-be78-fa56999806f3" + "outputId": "095d2c5d-b860-4af9-c1dc-9dc5d6ac8755" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Requirement already satisfied: PyPDF2 in /usr/local/lib/python3.11/dist-packages (3.0.1)\n" + "Collecting PyPDF2\n", + " Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)\n", + "Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/232.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━\u001b[0m \u001b[32m225.3/232.6 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: PyPDF2\n", + "Successfully installed PyPDF2-3.0.1\n" ] } ], @@ -78,78 +97,112 @@ "base_uri": "https://localhost:8080/" }, "id": "2HoyF-xbHEgv", - "outputId": "a509f236-08be-4371-d40d-aed1f49beed3" + "outputId": "e5f9a061-5117-447d-8e1b-1c5e6dc1b875" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-g4ugf7tj\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-g4ugf7tj\n", + " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-lus18o4a\n", + " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-lus18o4a\n", " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", " Switched to a new branch '5-add-benchmark'\n", " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit c5ee8e63ab951b740147be2d69c2f00549043734\n", + " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 0ab4688e84181b78317b8433eca7e3aaf70c8a1b\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (3.12.1)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev105+gc5ee8e6) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev105+gc5ee8e6) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev105+gc5ee8e6) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev105+gc5ee8e6) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev105+gc5ee8e6) (1.17.0)\n" + "Collecting fire (from structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (0.27.1)\n", + "Collecting loguru (from structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (2.10.6)\n", + "Collecting pymupdf4llm (from structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (6.0.2)\n", + "Collecting rapidfuzz (from structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting streamlit (from structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev113+g0ab4688) (2.5.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev113+g0ab4688) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev113+g0ab4688) (2.27.2)\n", + "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.5.0)\n", + "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.9.0)\n", + "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.5.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (8.1.8)\n", + "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.26.4)\n", + "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.2.2)\n", + "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (11.1.0)\n", + "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.25.6)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (17.0.0)\n", + "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (13.9.4)\n", + "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (9.0.0)\n", + "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.10.2)\n", + "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.1.44)\n", + "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev113+g0ab4688)\n", + " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (6.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.1.5)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.23.0)\n", + "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.24.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.0.12)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2025.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2024.12.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.18.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (25.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.22.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.17.0)\n", + "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", + "Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m56.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m96.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", + " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev113+g0ab4688-py3-none-any.whl size=13247 sha256=cfb9eb8bc7d4151e7d2e0e833826ea14066950d513d283142b21f54d7c8a29e4\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-zgr6pzyi/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=a21cd75c3bacefec755e81b0472785127c6443c8591f163b12efa5653e33b60e\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built structured-qa fire\n", + "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", + "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.12.1 streamlit-1.41.1 structured-qa-0.3.3.dev113+g0ab4688 watchdog-6.0.0\n" ] } ], @@ -174,23 +227,23 @@ "base_uri": "https://localhost:8080/" }, "id": "p_hsSGafHEgw", - "outputId": "7220b133-024e-480c-aed7-0879efab0317" + "outputId": "69a9eff5-626a-44ff-9403-6b7db111e765" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "--2025-02-03 14:27:25-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2025-02-04 18:22:19-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", - "Length: 21441 (21K) [text/plain]\n", - "Saving to: ‘structured_qa.csv.2’\n", + "Length: 23304 (23K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", "\n", - "structured_qa.csv.2 100%[===================>] 20.94K --.-KB/s in 0.001s \n", + "structured_qa.csv 100%[===================>] 22.76K --.-KB/s in 0.001s \n", "\n", - "2025-02-03 14:27:25 (14.8 MB/s) - ‘structured_qa.csv.2’ saved [21441/21441]\n", + "2025-02-04 18:22:20 (30.1 MB/s) - ‘structured_qa.csv’ saved [23304/23304]\n", "\n" ] } @@ -263,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": { "id": "n6d8F7cYHEgy" }, @@ -310,7 +363,11 @@ " try:\n", " answer = model.get_response(messages)\n", " except Exception as e:\n", - " answer = \"Out of context\"\n", + " answers = {\n", + " index: \"Out of context\" for index in document_data.index\n", + " }\n", + " sections = {index: None for index in document_data.index}\n", + " return answers, sections\n", " logger.info(f\"Answer: {answer}\")\n", " answers[index] = answer\n", " sections[index] = None\n", @@ -343,15 +400,29 @@ "execution_count": 10, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 176, + "referenced_widgets": [ + "e529d49f260c4dd095a9025e15c4cedf", + "37fd0a08531d488abf6cf2c4efe77f91", + "cc95fba624e244a4bf86d7f3c44da644", + "78d7b4f191ff46088877d933a11da241", + "77e5b0640b064226a9de17039489c493", + "0cea8125daee456ca58658176f92e380", + "128fa55577b54565a824102717cf7365", + "a26ab1d05fe044d099e4a11ecc4d95e3", + "45f5c1e972db4eb2b1a163c85d8b3d6f", + "d7a9754f17764fbba485fa9c1176145b", + "604381fc2e1d4ddda28291ddad36edc4" + ] }, "id": "U4R84hHRHEgz", - "outputId": "d05aabac-d3b2-4c20-f810-dc31a40daac7" + "outputId": "828f5651-efa9-412a-e371-973753323984" }, "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", @@ -360,6 +431,20 @@ "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-03 14:27:35.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:27:35.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-03 14:27:35.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:27:35.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-03 14:28:23.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:28:23.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-03 14:29:10.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:29:10.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-03 14:29:56.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:29:56.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-03 14:30:44.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:30:44.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-03 14:31:31.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:31:31.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-03 14:32:18.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:32:18.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-03 14:33:05.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:33:05.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-03 14:33:51.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:33:51.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-03 14:34:38.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:34:38.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:25.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:25.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:25.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:25.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:25.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:42.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:42.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:43.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:43.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:44.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:44.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:46.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:46.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:47.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:47.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:49.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:49.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:51.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:51.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:53.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B: NVIDIA P100\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:53.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:54.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:54.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:55.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:55.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:57.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:57.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:57.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:57.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:35:57.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:19.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:19.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:26.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:26.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:35.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:35.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:41.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:41.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:42.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:42.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:42.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:44.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:44.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:46.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:46.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:48.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:48.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:50.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:50.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:52.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:52.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:54.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:54.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:56.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:56.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:59.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:59.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:59.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:59.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:37:59.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:11.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:11.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:11.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:11.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:12.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:12.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:12.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 14\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:12.704\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:13.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:13.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:13.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:13.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:13.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:13.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:38:13.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:33.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:33.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:34.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:34.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:36.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:36.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:37.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:37.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:40.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:40.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:42.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:42.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:43.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:43.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:44.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:44.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:46.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:46.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:47.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:47.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:39:47.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:22.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:22.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:25.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:25.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:28.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:28.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:31.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:31.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:35.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:35.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:35.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:35.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:41:35.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:46.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:46.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:48.553\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:48.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:50.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:50.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:52.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:52.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:54.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:54.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:55.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:55.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:56.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:56.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:42:56.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:01.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:01.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:08.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:08.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:13.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:13.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:20.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:20.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:20.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:20.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:38.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:38.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:57.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:43:57.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:15.828\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:15.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:34.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:34.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:52.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:44:52.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:10.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:10.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:29.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:29.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:47.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:45:47.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:06.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:06.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:24.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:24.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:42.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:46:42.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:01.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Out of context\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:01.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:01.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:01.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:01.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:05.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:05.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:06.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:06.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:06.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:06.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:07.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:08.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:08.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:09.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:09.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:10.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:10.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:10.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:10.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:11.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:11.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:11.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:11.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:12.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:12.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:12.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:12.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:13.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:13.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:13.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:13.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:14.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:14.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:15.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:15.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m27\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:15.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:27.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:27.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:28.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:28.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:29.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:29.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:30.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:30.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:31.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:32.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:32.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:33.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:33.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:34.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:34.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:35.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:35.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:36.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:36.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:37.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:37.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:38.660\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:38.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-03 14:47:39.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n" + "\u001b[32m2025-02-04 18:29:09.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:09.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:09.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:09.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:09.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:49.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:49.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:49.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:29:49.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:04.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:04.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:05.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:05.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:07.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:07.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:08.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:08.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:09.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:09.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:11.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:11.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:12.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:12.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:13.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B: NVIDIA P100\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:13.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:14.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:14.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:16.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:16.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:17.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:17.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:17.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:17.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:30:17.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:32.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:32.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:39.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:39.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:46.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:46.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:52.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:52.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:53.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:53.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:53.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:54.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:54.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:54.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:31:54.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:05.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:05.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:05.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:05.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:06.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:06.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:06.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 14\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:06.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:06.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:06.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:07.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:07.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:07.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:07.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:32:07.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:22.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:22.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:23.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:23.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:24.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:24.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:26.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:26.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:28.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:28.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:29.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:29.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:30.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:30.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:32.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:32.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:33.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:33.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:34.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:34.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:33:34.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:04.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:04.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:07.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:07.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:10.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:10.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:13.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:13.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:15.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:15.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:16.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:16.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:35:16.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:24.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:24.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:25.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:25.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:27.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:27.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:28.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:28.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:30.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:30.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:32.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:32.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:33.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:33.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:33.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:37.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:38.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:38.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:38.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:54.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:54.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:54.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:54.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:58.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:58.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:58.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:58.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:59.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:59.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:59.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:36:59.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:00.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:00.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:01.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:01.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:01.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:01.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:01.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:01.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:02.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:02.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:02.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:02.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:03.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:03.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:03.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:03.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:04.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:04.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:04.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:04.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:05.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:05.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:05.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:05.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:05.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:17.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:17.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:17.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:18.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:18.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:18.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:19.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:19.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:21.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:21.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:21.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:21.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:22.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:22.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:23.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:23.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:24.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:24.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:25.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:25.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:25.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:25.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:26.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:26.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-04 18:37:27.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n" ] } ], @@ -676,18 +697,216 @@ "height": 1000 }, "id": "3eW9TIKjHEgz", - "outputId": "6ea51282-b725-45cb-a898-08fcfb974a98" + "outputId": "344909f7-1fac-4810-f56f-3fd22452973f" }, "outputs": [ { + "output_type": "execute_result", "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results\",\n \"rows\": 48,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 10,\n \"max\": 100,\n \"num_unique_values\": 48,\n \"samples\": [\n 72,\n 85,\n 71\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 29,\n \"samples\": [\n \"Limitations of generative AI and LLMs\",\n \"5.2. Thread Hierarchy\",\n \"2.1. Toilets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 48,\n \"samples\": [\n \"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\",\n \"How many AI-related regulations were enacted in the United States in 2023?\",\n \"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 13,\n \"samples\": [\n \"25\",\n \"C\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"NO\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "17 17 https://arxiv.org/pdf/2106.09685.pdf \n", + "22 22 https://authorsalliance.org/wp-content/uploads... \n", + "24 24 https://authorsalliance.org/wp-content/uploads... \n", + "27 27 https://arxiv.org/pdf/2201.11903 \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "29 29 https://arxiv.org/pdf/2201.11903 \n", + "30 30 https://arxiv.org/pdf/2201.11903 \n", + "31 31 https://arxiv.org/pdf/2201.11903 \n", + "32 32 https://arxiv.org/pdf/2201.11903 \n", + "33 33 https://arxiv.org/pdf/2201.11903 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", + "39 39 https://github.com/mozilla-ai/structured-qa/re... \n", + "40 40 https://github.com/mozilla-ai/structured-qa/re... \n", + "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", + "44 44 https://github.com/mozilla-ai/structured-qa/re... \n", + "62 62 https://github.com/mozilla-ai/structured-qa/re... \n", + "63 63 https://commission.europa.eu/document/download... \n", + "64 64 https://commission.europa.eu/document/download... \n", + "65 65 https://commission.europa.eu/document/download... \n", + "66 66 https://commission.europa.eu/document/download... \n", + "67 67 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "69 69 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "70 70 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "71 71 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "72 72 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "73 73 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "74 74 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "75 75 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "76 76 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "77 77 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "79 79 https://aiindex.stanford.edu/wp-content/upload... \n", + "80 80 https://aiindex.stanford.edu/wp-content/upload... \n", + "81 81 https://aiindex.stanford.edu/wp-content/upload... \n", + "82 82 https://aiindex.stanford.edu/wp-content/upload... \n", + "83 83 https://aiindex.stanford.edu/wp-content/upload... \n", + "84 84 https://aiindex.stanford.edu/wp-content/upload... \n", + "85 85 https://aiindex.stanford.edu/wp-content/upload... \n", + "86 86 https://aiindex.stanford.edu/wp-content/upload... \n", + "87 87 https://aiindex.stanford.edu/wp-content/upload... \n", + "88 88 https://aiindex.stanford.edu/wp-content/upload... \n", + "90 90 https://arxiv.org/pdf/2302.13971 \n", + "94 94 https://arxiv.org/pdf/2302.13971 \n", + "99 99 https://assets.publishing.service.gov.uk/media... \n", + "100 100 https://assets.publishing.service.gov.uk/media... \n", + "\n", + " type section \\\n", + "10 Scientific Paper 5.4 Regularization \n", + "17 Scientific Report 4 OUR METHOD \n", + "22 Techincal Documentation HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER? \n", + "24 Techincal Documentation OVERCOMING RESERVATIONS ABOUT OPEN ACCESS \n", + "27 Scientific Report 3 Arithmetic Reasoning \n", + "28 Scientific Report 3.1 Experimental Setup \n", + "29 Scientific Report 3.1 Experimental Setup \n", + "30 Scientific Report 5 Symbolic Reasoning \n", + "31 Scientific Report 5 Symbolic Reasoning \n", + "32 Scientific Report 5 Symbolic Reasoning \n", + "33 Scientific Report 3.4 Robustness of Chain of Thought \n", + "34 Scientific Report 3.2 Results \n", + "37 Board Game CARD AND TILE EFFECTS \n", + "39 Board Game CHAPTER OVERVIEW \n", + "40 Board Game CHAPTER OVERVIEW \n", + "41 Board Game CHAPTER OVERVIEW \n", + "44 Board Game CARD AND TILE EFFECTS \n", + "62 Board Game GAME END \n", + "63 Regulation 2.1. Toilets \n", + "64 Regulation CARBON MONOXIDE DETECTION AND VENTING \n", + "65 Regulation 4.1. Natural lighting \n", + "66 Regulation 1.2.1. Internal partitions and doors \n", + "67 Techincal Documentation 5.2. Thread Hierarchy \n", + "68 Techincal Documentation 5.2. Thread Hierarchy \n", + "69 Techincal Documentation 6.1.1. Compilation Workflow \n", + "70 Techincal Documentation 6.1.1. Compilation Workflow \n", + "71 Techincal Documentation 6.1.1. Compilation Workflow \n", + "72 Techincal Documentation 6.1.1. Compilation Workflow \n", + "73 Techincal Documentation 15.3. API Fundamentals \n", + "74 Techincal Documentation 15.3. API Fundamentals \n", + "75 Techincal Documentation 15.3. API Fundamentals \n", + "76 Techincal Documentation 15.3. API Fundamentals \n", + "77 Techincal Documentation 23.1. What is Lazy Loading? \n", + "78 Techincal Documentation 23.1. What is Lazy Loading? \n", + "79 Scientific Report Risk Perception \n", + "80 Scientific Report Risk Perception \n", + "81 Scientific Report Training Cost \n", + "82 Scientific Report Training Cost \n", + "83 Scientific Report LLM Tokenization Introduces Unfairness \n", + "84 Scientific Report LLM Tokenization Introduces Unfairness \n", + "85 Scientific Report U.S. Regulation \n", + "86 Scientific Report U.S. Regulation \n", + "87 Scientific Report Europe \n", + "88 Scientific Report Europe \n", + "90 Scientific Report 2.1 Pre-training Data \n", + "94 Scientific Report 3 Main results \n", + "99 Regulation Limitations of generative AI and LLMs \n", + "100 Regulation Procurement in an emerging market \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "17 Does LoRA work with any neural network contain... YES \n", + "22 how many peer-reviewed open access journals ar... A \n", + "24 Are publication fees required for all open acc... NO \n", + "27 Is Arithmetic reasoning is a task that languag... NO \n", + "28 How many large language models were evaluated? 5 \n", + "29 How many benchmarks were used to evaluate arit... 5 \n", + "30 Is symbolic reasoning usually simple for human... YES \n", + "31 How many words have the example names that the... B \n", + "32 Which symbolic reasoning task is used as an ou... A \n", + "33 How many annotators provided independent chain... 3 \n", + "34 How many random samples were examined to under... 100 \n", + "37 How many different races are there? 6 \n", + "39 Can you take a Chapter card and a Landmark til... NO \n", + "40 How many goins does a player take when discard... 3 \n", + "41 After taking a landmark tile, do you reveal a ... NO \n", + "44 Can you use a symbol more than once per turn? NO \n", + "62 If player 1 has 30 Victory points and 4 worker... A \n", + "63 Which type of water must be supplied in a toil... B \n", + "64 In which type of parkings must a carbon monoxi... C \n", + "65 What percentage is the daylight factor require... A \n", + "66 What fire resistance must vertical partitions ... A \n", + "67 What is the maximum number of threads within a... 1024 \n", + "68 Can you identify a thread with a four-dimensio... NO \n", + "69 In the offline compilation process using nvcc,... B \n", + "70 What are the two ways the host code can be out... B \n", + "71 What is the primary purpose of just-in-time (J... C \n", + "72 What happens to the compiled binary code after... A \n", + "73 When are virtual addresses assigned to graph a... C \n", + "74 What do graph memory nodes represent in a CUDA... A \n", + "75 When does a graph allocation's lifetime end? -... B \n", + "76 How must operations accessing graph memory be ... C \n", + "77 What is the primary benefit of Lazy Loading? -... A \n", + "78 Can you enable lazy loading by setting the env... NO \n", + "79 which type of risk was identified as the leadi... B \n", + "80 In which geographical area were fairness risks... C \n", + "81 What is a major consequence of the rising trai... A \n", + "82 How the AI Index and Epoch AI estimated traini... C \n", + "83 What is a major source of inequality in AI rel... A \n", + "84 What are the three major inequalities resultin... B \n", + "85 How many AI-related regulations were enacted i... 25 \n", + "86 Which of the following was identified as a hig... B \n", + "87 Which country had the highest proportion of fe... B \n", + "88 Which countries reported the smallest proporti... C \n", + "90 How many languages did the Wikipedia data cover? 20 \n", + "94 Was the model compared against GPT-4? NO \n", + "99 Can LLMs be used as an alternative to visiting... NO \n", + "100 Which of the following is NOT mentioned as a r... C \n", + "\n", + " pred_answer pred_section \n", + "10 YES NaN \n", + "17 NO NaN \n", + "22 B NaN \n", + "24 I NEED MORE INFO NaN \n", + "27 OUT OF CONTEXT NaN \n", + "28 OUT OF CONTEXT NaN \n", + "29 OUT OF CONTEXT NaN \n", + "30 OUT OF CONTEXT NaN \n", + "31 OUT OF CONTEXT NaN \n", + "32 OUT OF CONTEXT NaN \n", + "33 OUT OF CONTEXT NaN \n", + "34 OUT OF CONTEXT NaN \n", + "37 5 NaN \n", + "39 I NEED MORE INFO NaN \n", + "40 I NEED MORE INFO NaN \n", + "41 YES NaN \n", + "44 YES NaN \n", + "62 C NaN \n", + "63 OUT OF CONTEXT NaN \n", + "64 OUT OF CONTEXT NaN \n", + "65 OUT OF CONTEXT NaN \n", + "66 OUT OF CONTEXT NaN \n", + "67 OUT OF CONTEXT NaN \n", + "68 OUT OF CONTEXT NaN \n", + "69 OUT OF CONTEXT NaN \n", + "70 OUT OF CONTEXT NaN \n", + "71 OUT OF CONTEXT NaN \n", + "72 OUT OF CONTEXT NaN \n", + "73 OUT OF CONTEXT NaN \n", + "74 OUT OF CONTEXT NaN \n", + "75 OUT OF CONTEXT NaN \n", + "76 OUT OF CONTEXT NaN \n", + "77 OUT OF CONTEXT NaN \n", + "78 OUT OF CONTEXT NaN \n", + "79 OUT OF CONTEXT NaN \n", + "80 OUT OF CONTEXT NaN \n", + "81 OUT OF CONTEXT NaN \n", + "82 OUT OF CONTEXT NaN \n", + "83 OUT OF CONTEXT NaN \n", + "84 OUT OF CONTEXT NaN \n", + "85 OUT OF CONTEXT NaN \n", + "86 OUT OF CONTEXT NaN \n", + "87 OUT OF CONTEXT NaN \n", + "88 OUT OF CONTEXT NaN \n", + "90 8 NaN \n", + "94 I NEED MORE INFO NaN \n", + "99 I NEED MORE INFO NaN \n", + "100 I NEED MORE INFO NaN " + ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP = 0.1NaN
4343https://arxiv.org/pdf/2201.119033.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...32NaN
4747https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?65NaN
5151https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
5252https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
6565https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } ], - "text/plain": [ - " Unnamed: 0 document \\\n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "43 43 https://arxiv.org/pdf/2201.11903 \n", - "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", - "51 51 https://github.com/mozilla-ai/structured-qa/re... \n", - "52 52 https://github.com/mozilla-ai/structured-qa/re... \n", - "65 65 https://github.com/mozilla-ai/structured-qa/re... \n", - "\n", - " section \\\n", - "10 5.4 Regularization \n", - "43 3.4 Robustness of Chain of Thought \n", - "47 CARD AND TILE EFFECTS \n", - "51 CHAPTER OVERVIEW \n", - "52 CARD AND TILE COSTS \n", - "65 EXPEDITION PHASE \n", - "\n", - " question answer pred_answer \\\n", - "10 What was the dropout rate used for the base mo... 0.1 PDROP = 0.1 \n", - "43 How many annotators provided independent chain... 3 2 \n", - "47 How many different races are there? 6 5 \n", - "51 After taking a landmark tile, do you reveal a ... NO YES \n", - "52 Can a player pay coins to compensate for missi... YES NO \n", - "65 Do you need a fish to conquer a distant island? YES NO \n", - "\n", - " pred_section \n", - "10 NaN \n", - "43 NaN \n", - "47 NaN \n", - "51 NaN \n", - "52 NaN \n", - "65 NaN " + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "for index, result in results.iterrows():\n", - " if result[\"pred_answer\"].startswith(\n", - " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", - " ):\n", - " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "wfz1XQDLlyha", - "outputId": "e2539d58-338a-4b59-87ae-ed66c480b015" - }, - "outputs": [ { - "data": { - "text/plain": [ - "0.9393939393939394" + "cell_type": "markdown", + "metadata": { + "id": "qwHWJEsulyhV" + }, + "source": [ + "# Setup" ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "0b95e6210f4f4788a718c652cd5593c4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } }, - "3459132c36e6472ba415aca0de6fe3e5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "iJ812u2llyhV" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] }, - "397f1a99a7cf41599453f936b207a44a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3459132c36e6472ba415aca0de6fe3e5", - "placeholder": "​", - "style": "IPY_MODEL_0b95e6210f4f4788a718c652cd5593c4", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" - } + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "jWlaKC5qXZrh" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] }, - "87bfd1ae753e4586a139a988ae2c4601": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "cell_type": "markdown", + "metadata": { + "id": "MKijHC_ClyhX" + }, + "source": [ + "## Function to Process all questions for a single Section" + ] }, - "8ca481ba99984fb2b9d87f02813c7408": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "oFU-eYMVlyhX" + }, + "outputs": [], + "source": [ + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", + "- Single letter (for multiple-choice questions)\n", + "\"\"\"\n", + "\n", + "\n", + "def process_section_questions(\n", + " section_file,\n", + " section_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in section_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", + " CURRENT_INFO=section_file.read_text()\n", + " ),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " response = model.get_response(messages)\n", + " logger.info(f\"Answer: {response}\")\n", + " answers[index] = response\n", + " sections[index] = None\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VQAof5xtlyhY" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "6RoEbYj3XZri" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] }, - "970fb4579dab4f0e96f2efe052f0a463": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_397f1a99a7cf41599453f936b207a44a", - "IPY_MODEL_e80453d93a614c5885d0a8270db53e7a", - "IPY_MODEL_dc3fdea0c6cf45239f995b029219b664" + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156, + "referenced_widgets": [ + "edf16c851ed847a483d3cbc2022bc3aa", + "02809691f37345feb9027bb55c58c50b", + "92a91f686ac44ca39056f1ee95448c64", + "ebd54259bb264e72bc12dc33ececa0d3", + "44715f975fc441cfba941f061d728cfd", + "4f518be579c64f9a82bbd0db44c65492", + "7fbb4559c29547e19ade206b66e3f6c4", + "4dbddc95288342e38aac3f84a48eaf5d", + "57a78a6a8cd244c989bc1273c73a9f71", + "33ba4f58a45644a0a3ade33cd0add51d", + "fc63523a46b44a63820335ec4976a246" + ] + }, + "id": "ObsvwlNslyhZ", + "outputId": "50fbe86d-075b-46b5-ca45-5b24eabaf753" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:42.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:42.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:44.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:44.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:44.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:44.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:44.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:45.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A: EI30\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:45.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:45.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:45.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:46.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A: 4.5%\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:48.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:49.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:49.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:49.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:49.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:49.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:50.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:51.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:51.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:51.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 14\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:52.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:53.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:54.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:54.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:54.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:54.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:54.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:55.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: Five\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:55.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:55.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:55.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:55.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 100\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:56.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:57.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:58.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:59.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:59.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:59.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:59.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:59.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:59.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:01:59.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:00.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:01.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 1024\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:01.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:01.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:01.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:01.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: Pdrop = 0.1\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:02.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:03.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:04.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:04.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:04.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:04.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:04.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:05.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:06.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:07.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:07.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:07.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:07.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:07.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:07.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:07.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:08.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:10.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:11.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:11.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:11.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:11.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:11.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:12.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:13.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:13.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:13.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:13.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:13.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:14.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:15.465\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:15.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:15.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:15.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:15.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:17.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:18.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:19.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:19.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:19.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:19.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:19.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:20.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:20.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:20.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:20.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:20.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:21.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:21.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:21.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:21.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:21.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", + "\u001b[32m2025-02-04 18:02:21.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured-qa/benchmark/structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for section_name, section_data in data.groupby(\"section\"):\n", + " section_file = Path(f\"structured-qa/benchmark/perfect_context/{section_name}.txt\")\n", + "\n", + " answers, sections = process_section_questions(section_file, section_data, model)\n", + "\n", + " for index in section_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a9eMkW1-lyha" + }, + "source": [ + "# Results" + ] }, - "dc3fdea0c6cf45239f995b029219b664": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c31f992a506241ea94e254189dc696f1", - "placeholder": "​", - "style": "IPY_MODEL_87bfd1ae753e4586a139a988ae2c4601", - "value": " 8.10G/8.10G [03:12<00:00, 41.5MB/s]" - } + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "EYYJgWf6lyha", + "outputId": "49497ef0-fb43-47c7-a1e4-065d11fa0d08" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "32 32 https://arxiv.org/pdf/2201.11903 \n", + "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", + "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", + "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "58 58 https://github.com/mozilla-ai/structured-qa/re... \n", + "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "94 94 https://arxiv.org/pdf/2302.13971 \n", + "\n", + " type section \\\n", + "10 Scientific Paper 5.4 Regularization \n", + "28 Scientific Report 3.1 Experimental Setup \n", + "32 Scientific Report 5 Symbolic Reasoning \n", + "37 Board Game CARD AND TILE EFFECTS \n", + "41 Board Game CHAPTER OVERVIEW \n", + "42 Board Game CARD AND TILE COSTS \n", + "55 Board Game EXPEDITION PHASE \n", + "58 Board Game LOCATION ABILITIES \n", + "68 Techincal Documentation 5.2. Thread Hierarchy \n", + "94 Scientific Report 3 Main results \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "28 How many large language models were evaluated? 5 \n", + "32 Which symbolic reasoning task is used as an ou... A \n", + "37 How many different races are there? 6 \n", + "41 After taking a landmark tile, do you reveal a ... NO \n", + "42 Can a player pay coins to compensate for missi... YES \n", + "55 Do you need a fish to conquer a distant island? YES \n", + "58 How many victory points are granted by a built... 1 \n", + "68 Can you identify a thread with a four-dimensio... NO \n", + "94 Was the model compared against GPT-4? NO \n", + "\n", + " pred_answer pred_section \n", + "10 PDROP = 0.1 NaN \n", + "28 FIVE NaN \n", + "32 I NEED MORE INFO NaN \n", + "37 5 NaN \n", + "41 YES NaN \n", + "42 NO NaN \n", + "55 NO NaN \n", + "58 I NEED MORE INFO NaN \n", + "68 I NEED MORE INFO NaN \n", + "94 I NEED MORE INFO NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documenttypesectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.03762Scientific Paper5.4 RegularizationWhat was the dropout rate used for the base mo...0.1PDROP = 0.1NaN
2828https://arxiv.org/pdf/2201.11903Scientific Report3.1 Experimental SetupHow many large language models were evaluated?5FIVENaN
3232https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningWhich symbolic reasoning task is used as an ou...AI NEED MORE INFONaN
3737https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE EFFECTSHow many different races are there?65NaN
4141https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
4242https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNONaN
5555https://github.com/mozilla-ai/structured-qa/re...Board GameEXPEDITION PHASEDo you need a fish to conquer a distant island?YESNONaN
5858https://github.com/mozilla-ai/structured-qa/re...Board GameLOCATION ABILITIESHow many victory points are granted by a built...1I NEED MORE INFONaN
6868https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOI NEED MORE INFONaN
9494https://arxiv.org/pdf/2302.13971Scientific Report3 Main resultsWas the model compared against GPT-4?NOI NEED MORE INFONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 23,\n \"min\": 10,\n \"max\": 94,\n \"num_unique_values\": 10,\n \"samples\": [\n 68,\n 28,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Scientific Report\",\n \"Techincal Documentation\",\n \"Scientific Paper\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"5.2. Thread Hierarchy\",\n \"3.1 Experimental Setup\",\n \"CARD AND TILE COSTS\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Can you identify a thread with a four-dimensional index?\",\n \"How many large language models were evaluated?\",\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"0.1\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"PDROP = 0.1\",\n \"FIVE\",\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfz1XQDLlyha", + "outputId": "e26fc721-839a-4e86-eeaa-873628254489" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9029126213592233" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] }, - "de44370268a441c48c7b81fc0b138c22": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "e80453d93a614c5885d0a8270db53e7a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_de44370268a441c48c7b81fc0b138c22", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_8ca481ba99984fb2b9d87f02813c7408", - "value": 8098525888 - } + "language_info": { + "name": "python", + "version": "3.10.12" }, - "f3534030a918419fa1687b6407ff002e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "edf16c851ed847a483d3cbc2022bc3aa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_02809691f37345feb9027bb55c58c50b", + "IPY_MODEL_92a91f686ac44ca39056f1ee95448c64", + "IPY_MODEL_ebd54259bb264e72bc12dc33ececa0d3" + ], + "layout": "IPY_MODEL_44715f975fc441cfba941f061d728cfd" + } + }, + "02809691f37345feb9027bb55c58c50b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f518be579c64f9a82bbd0db44c65492", + "placeholder": "​", + "style": "IPY_MODEL_7fbb4559c29547e19ade206b66e3f6c4", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "92a91f686ac44ca39056f1ee95448c64": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4dbddc95288342e38aac3f84a48eaf5d", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_57a78a6a8cd244c989bc1273c73a9f71", + "value": 8098525888 + } + }, + "ebd54259bb264e72bc12dc33ececa0d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33ba4f58a45644a0a3ade33cd0add51d", + "placeholder": "​", + "style": "IPY_MODEL_fc63523a46b44a63820335ec4976a246", + "value": " 8.10G/8.10G [03:12<00:00, 41.7MB/s]" + } + }, + "44715f975fc441cfba941f061d728cfd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f518be579c64f9a82bbd0db44c65492": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7fbb4559c29547e19ade206b66e3f6c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4dbddc95288342e38aac3f84a48eaf5d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "57a78a6a8cd244c989bc1273c73a9f71": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "33ba4f58a45644a0a3ade33cd0add51d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc63523a46b44a63820335ec4976a246": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 476bbe1e43ed99b8a84f7e1e816d2ecf1883830c Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 5 Feb 2025 16:10:23 +0100 Subject: [PATCH 115/120] Bring back llama-cpp-python --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b77ed97..545d102 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dynamic = ["version"] dependencies = [ "fire", "huggingface-hub", + "llama-cpp-python", "loguru", "pydantic", "pymupdf4llm", From fdafdc35e11570a910406a7985f4eb699d383e2d Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 5 Feb 2025 16:12:21 +0100 Subject: [PATCH 116/120] Update prompts --- src/structured_qa/config.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/structured_qa/config.py b/src/structured_qa/config.py index e160b67..b0f02f2 100644 --- a/src/structured_qa/config.py +++ b/src/structured_qa/config.py @@ -27,20 +27,19 @@ ANSWER_PROMPT = """ You are a rigorous assistant answering questions. -You only answer based on the current information available. - -The current information available is: +You must only answer based on the current information available which is: ``` {CURRENT_INFO} ``` If the current information available not enough to answer the question, -you must return the following message and nothing else: +you must return "I need more info" and nothing else. -``` -I need more info. -``` +If the current information is enough to answer, you must return one of the following formats: +- YES/NO (for boolean questions) +- Number (for numeric questions) +- Single letter (for multiple-choice questions) """ From 2ac1f618a3e16fb14376d9177c4da6d3a78b7528 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 5 Feb 2025 16:34:37 +0100 Subject: [PATCH 117/120] Reduce notebook size --- .pre-commit-config.yaml | 2 +- benchmark/gemini_RAGatouille.ipynb | 3034 ++++--------- benchmark/gemini_find_retrieve_answer.ipynb | 1761 +------- benchmark/gemini_full_context.ipynb | 490 +-- benchmark/gemini_perfect_context.ipynb | 292 +- benchmark/qwen_2_5_7B_RAGatouille.ipynb | 3581 ++++++---------- .../qwen_2_5_7B_find_retrieve_answer.ipynb | 837 +++- benchmark/qwen_2_5_7B_full_context.ipynb | 3776 ++++++++--------- benchmark/qwen_2_5_7B_perfect_context.ipynb | 776 +--- 9 files changed, 4692 insertions(+), 9857 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6692227..41710a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,4 +25,4 @@ repos: rev: v2.3.0 hooks: - id: codespell - exclude: CODE_OF_CONDUCT.md + exclude: CODE_OF_CONDUCT.md|benchmark/* diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb index cb81212..e6e2a90 100644 --- a/benchmark/gemini_RAGatouille.ipynb +++ b/benchmark/gemini_RAGatouille.ipynb @@ -63,8 +63,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "GPU is available!\n" ] @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -98,245 +98,14 @@ "id": "P1eAychVq3my", "outputId": "088fe47d-7bf6-42c9-f538-e0052369aff7" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: ragatouille in /usr/local/lib/python3.11/dist-packages (0.0.8.post4)\n", - "Requirement already satisfied: PyPDF2 in /usr/local/lib/python3.11/dist-packages (3.0.1)\n", - "Requirement already satisfied: colbert-ai==0.2.19 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.2.19)\n", - "Requirement already satisfied: faiss-cpu<2.0.0,>=1.7.4 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (1.10.0)\n", - "Requirement already satisfied: fast-pytorch-kmeans==0.2.0.1 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.2.0.1)\n", - "Requirement already satisfied: langchain>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.16)\n", - "Requirement already satisfied: langchain_core>=0.1.4 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.3.32)\n", - "Requirement already satisfied: llama-index>=0.7 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (0.12.15)\n", - "Requirement already satisfied: onnx<2.0.0,>=1.15.0 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (1.17.0)\n", - "Requirement already satisfied: sentence-transformers<3.0.0,>=2.2.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.7.0)\n", - "Requirement already satisfied: srsly==2.4.8 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.4.8)\n", - "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.5.1+cu124)\n", - "Requirement already satisfied: transformers<5.0.0,>=4.36.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (4.47.1)\n", - "Requirement already satisfied: voyager<3.0.0,>=2.0.2 in /usr/local/lib/python3.11/dist-packages (from ragatouille) (2.1.0)\n", - "Requirement already satisfied: bitarray in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.0.0)\n", - "Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.2.0)\n", - "Requirement already satisfied: flask in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (3.1.0)\n", - "Requirement already satisfied: git-python in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.0.3)\n", - "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.0.1)\n", - "Requirement already satisfied: ninja in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.11.1.3)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (1.13.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (4.67.1)\n", - "Requirement already satisfied: ujson in /usr/local/lib/python3.11/dist-packages (from colbert-ai==0.2.19->ragatouille) (5.10.0)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fast-pytorch-kmeans==0.2.0.1->ragatouille) (1.26.4)\n", - "Requirement already satisfied: pynvml in /usr/local/lib/python3.11/dist-packages (from fast-pytorch-kmeans==0.2.0.1->ragatouille) (12.0.0)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in /usr/local/lib/python3.11/dist-packages (from srsly==2.4.8->ragatouille) (2.0.10)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from faiss-cpu<2.0.0,>=1.7.4->ragatouille) (24.2)\n", - "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (6.0.2)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.0.37)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (3.11.11)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.5)\n", - "Requirement already satisfied: langsmith<0.4,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (0.3.2)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.10.6)\n", - "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (2.32.3)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain>=0.1.0->ragatouille) (9.0.0)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (1.33)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain_core>=0.1.4->ragatouille) (4.12.2)\n", - "Requirement already satisfied: llama-index-agent-openai<0.5.0,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.4.3)\n", - "Requirement already satisfied: llama-index-cli<0.5.0,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.4.0)\n", - "Requirement already satisfied: llama-index-core<0.13.0,>=0.12.15 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.12.15)\n", - "Requirement already satisfied: llama-index-embeddings-openai<0.4.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.3.1)\n", - "Requirement already satisfied: llama-index-indices-managed-llama-cloud>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.6.4)\n", - "Requirement already satisfied: llama-index-llms-openai<0.4.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.3.17)\n", - "Requirement already satisfied: llama-index-multi-modal-llms-openai<0.5.0,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.4.3)\n", - "Requirement already satisfied: llama-index-program-openai<0.4.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.3.1)\n", - "Requirement already satisfied: llama-index-question-gen-openai<0.4.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.3.0)\n", - "Requirement already satisfied: llama-index-readers-file<0.5.0,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.4.4)\n", - "Requirement already satisfied: llama-index-readers-llama-parse>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (0.4.0)\n", - "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.11/dist-packages (from llama-index>=0.7->ragatouille) (3.9.1)\n", - "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.11/dist-packages (from onnx<2.0.0,>=1.15.0->ragatouille) (4.25.6)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (1.6.1)\n", - "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (0.27.1)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers<3.0.0,>=2.2.2->ragatouille) (11.1.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.17.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.5)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2024.9.0)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.4.127)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.4.127)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.4.127)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.4.5.8)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.2.1.3)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (10.3.5.147)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (11.6.1.9)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.3.1.170)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.4.127)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (12.4.127)\n", - "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (3.1.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.13->ragatouille) (1.13.1)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.13->ragatouille) (1.3.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (2024.11.6)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.21.0)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.36.2->ragatouille) (0.5.2)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (25.1.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.1.0->ragatouille) (1.18.3)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain_core>=0.1.4->ragatouille) (3.0.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.28.1)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.10.15)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.0)\n", - "Requirement already satisfied: zstandard<0.24.0,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.23.0)\n", - "Requirement already satisfied: openai>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.59.9)\n", - "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (0.6.7)\n", - "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (1.2.18)\n", - "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (1.0.8)\n", - "Requirement already satisfied: filetype<2.0.0,>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (1.2.0)\n", - "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (1.6.0)\n", - "Requirement already satisfied: tiktoken>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (0.8.0)\n", - "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (0.9.0)\n", - "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (1.17.2)\n", - "Requirement already satisfied: llama-cloud<0.2.0,>=0.1.8 in /usr/local/lib/python3.11/dist-packages (from llama-index-indices-managed-llama-cloud>=0.4.0->llama-index>=0.7->ragatouille) (0.1.11)\n", - "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (4.12.3)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.2.2)\n", - "Requirement already satisfied: pypdf<6.0.0,>=5.1.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (5.2.0)\n", - "Requirement already satisfied: striprtf<0.0.27,>=0.0.26 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (0.0.26)\n", - "Requirement already satisfied: llama-parse>=0.5.0 in /usr/local/lib/python3.11/dist-packages (from llama-index-readers-llama-parse>=0.4.0->llama-index>=0.7->ragatouille) (0.5.20)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (8.1.8)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk>3.8.1->llama-index>=0.7->ragatouille) (1.4.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain>=0.1.0->ragatouille) (2.27.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain>=0.1.0->ragatouille) (2024.12.14)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain>=0.1.0->ragatouille) (3.1.1)\n", - "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (17.0.0)\n", - "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (0.3.8)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (3.5.0)\n", - "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets->colbert-ai==0.2.19->ragatouille) (0.70.16)\n", - "Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (3.1.3)\n", - "Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (2.2.0)\n", - "Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from flask->colbert-ai==0.2.19->ragatouille) (1.9.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.13->ragatouille) (3.0.2)\n", - "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from git-python->colbert-ai==0.2.19->ragatouille) (3.1.44)\n", - "Requirement already satisfied: nvidia-ml-py<13.0.0a0,>=12.0.0 in /usr/local/lib/python3.11/dist-packages (from pynvml->fast-pytorch-kmeans==0.2.0.1->ragatouille) (12.570.86)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers<3.0.0,>=2.2.2->ragatouille) (3.5.0)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.6)\n", - "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (3.7.1)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain>=0.1.0->ragatouille) (0.14.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.9.0)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (0.8.2)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.3.1)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (1.0.0)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.11/dist-packages (from dataclasses-json->llama-index-core<0.13.0,>=0.12.15->llama-index>=0.7->ragatouille) (3.26.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->git-python->colbert-ai==0.2.19->ragatouille) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (2025.1)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->git-python->colbert-ai==0.2.19->ragatouille) (5.0.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-readers-file<0.5.0,>=0.4.0->llama-index>=0.7->ragatouille) (1.17.0)\n" - ] - } - ], - "source": [ - "%pip install ragatouille PyPDF2" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "I0dl5xGnq3my", - "outputId": "8b3f9944-46f3-423e-e5f4-7738edec5966" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-v6q9_weu\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-v6q9_weu\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 97049d67d83ec6129569d442bd365c7a5e490578\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (3.12.1)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev111+g97049d6) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.9.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev111+g97049d6) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.17.0)\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + "%pip install --quiet PyPDF2 ragatouille structured-qa" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -344,25 +113,7 @@ "id": "Nl_haxghq3mz", "outputId": "773b3a25-285b-408a-bc9d-f490576f91fb" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2025-02-04 15:27:16-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 23304 (23K) [text/plain]\n", - "Saving to: ‘structured_qa.csv.1’\n", - "\n", - "structured_qa.csv.1 100%[===================>] 22.76K --.-KB/s in 0.001s \n", - "\n", - "2025-02-04 15:27:17 (28.5 MB/s) - ‘structured_qa.csv.1’ saved [23304/23304]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" ] @@ -443,7 +194,6 @@ }, "outputs": [], "source": [ - "import json\n", "import time\n", "\n", "from ragatouille import RAGPretrainedModel\n", @@ -556,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -644,1241 +394,53 @@ "id": "W9r17Rz3q3m1", "outputId": "5ffcccdc-9569-4ec4-c14e-204994917725" }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\u001b[32m2025-02-04 15:27:45.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-04 15:27:45.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:27:45.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:27:45.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "artifact.metadata: 0%| | 0.00/1.63k [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-04 15:29:56.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:29:56.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 56 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:31:13.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:31:13.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 137 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-04 15:31:27.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:31:27.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 199 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-04 15:32:41.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:32:41.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 44 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-04 15:33:50.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:33:50.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 144 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:35:07.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:35:07.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 168 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/6 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:35:16.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:35:16.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 143 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:36:28.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:36:28.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 364 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/12 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:36:40.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:36:40.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 1803 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\r 0%| | 0/57 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:39:25.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:39:25.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 17 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:40:48.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 15:40:48.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 48 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/2 [00:00Discard any remaining, face-up Island cards and reveal new ones.\n", - " >Pass the First player marker to \u001b[0m\n", - "\u001b[32m2025-02-04 15:41:55.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m25\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:55.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:55.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>There is no limit to the number, type, or order of \n", - "actions a player may take during the Action pha\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:57.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mNO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:57.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:57.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mRations are needed for the long journey!\n", - "A player can choose to Pillage a selected Island card with\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:58.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:58.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-04 15:41:58.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mEach \n", - "action draws the clans closer to becoming the greatest empire! The \n", - "game ends in the same roun\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:00.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m1\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:00.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:00.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mGAME FLOW\n", - "Note for Imperial Settlers fans \n", - "You cannot Spend 2 Workers \n", - "to get a Resource or a card.\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:01.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mYES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:01.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:01.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", - "the Resources on the righ\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:02.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1m1\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 15:42:02.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:02.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:02.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNOTE 2: Some abilities in the \n", - "game have a ‘/’ divider between \n", - "presented choices. This should be \n", - "t\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:04.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mYES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:04.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:04.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mThus allowing a player to play \n", - "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:05.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mNO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:05.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:05.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:07.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mYES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:07.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:07.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-02-04 15:43:08.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mA\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 489 - }, - "id": "mltqL7Bhq3m1", - "outputId": "2ff97f53-d4f1-45a4-856e-2d1fff12a819" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "26 26 https://authorsalliance.org/wp-content/uploads... \n", - "28 28 https://arxiv.org/pdf/2201.11903 \n", - "29 29 https://arxiv.org/pdf/2201.11903 \n", - "33 33 https://arxiv.org/pdf/2201.11903 \n", - "34 34 https://arxiv.org/pdf/2201.11903 \n", - "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", - "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", - "45 45 https://github.com/mozilla-ai/structured-qa/re... \n", - "57 57 https://github.com/mozilla-ai/structured-qa/re... \n", - "73 73 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "92 92 https://arxiv.org/pdf/2302.13971 \n", - "98 98 https://assets.publishing.service.gov.uk/media... \n", - "\n", - " type \\\n", - "10 Scientific Paper \n", - "26 Techincal Documentation \n", - "28 Scientific Report \n", - "29 Scientific Report \n", - "33 Scientific Report \n", - "34 Scientific Report \n", - "37 Board Game \n", - "42 Board Game \n", - "45 Board Game \n", - "57 Board Game \n", - "73 Techincal Documentation \n", - "78 Techincal Documentation \n", - "92 Scientific Report \n", - "98 Regulation \n", - "\n", - " section \\\n", - "10 5.4 Regularization \n", - "26 CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK... \n", - "28 3.1 Experimental Setup \n", - "29 3.1 Experimental Setup \n", - "33 3.4 Robustness of Chain of Thought \n", - "34 3.2 Results \n", - "37 CARD AND TILE EFFECTS \n", - "42 CARD AND TILE COSTS \n", - "45 CARD AND TILE EFFECTS \n", - "57 CLEANUP PHASE \n", - "73 15.3. API Fundamentals \n", - "78 23.1. What is Lazy Loading? \n", - "92 2.3 Optimizer \n", - "98 Limitations of generative AI and LLMs \n", - "\n", - " question answer \\\n", - "10 What was the dropout rate used for the base mo... 0.1 \n", - "26 Are Gold Open Access and Green Open Access mut... NO \n", - "28 How many large language models were evaluated? 5 \n", - "29 How many benchmarks were used to evaluate arit... 5 \n", - "33 How many annotators provided independent chain... 3 \n", - "34 How many random samples were examined to under... 100 \n", - "37 How many different races are there? 6 \n", - "42 Can a player pay coins to compensate for missi... YES \n", - "45 Which type of cards provide coins? -A: Gray -B... B \n", - "57 Is there a cleanup phase in the final round? NO \n", - "73 When are virtual addresses assigned to graph a... C \n", - "78 Can you enable lazy loading by setting the env... NO \n", - "92 What value was used for the weight decay? 0.1 \n", - "98 Which of the following is not considered a lim... C \n", - "\n", - " pred_answer pred_section \n", - "10 0. 1 NaN \n", - "26 YES NaN \n", - "28 FIVE NaN \n", - "29 FIVE NaN \n", - "33 THREE NaN \n", - "34 50 NaN \n", - "37 I NEED MORE INFO NaN \n", - "42 NO NaN \n", - "45 I NEED MORE INFO NaN \n", - "57 YES NaN \n", - "73 A NaN \n", - "78 I NEED MORE INFO NaN \n", - "92 0:1 NaN \n", - "98 I NEED MORE INFO NaN " - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 14,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 26,\n \"min\": 10,\n \"max\": 98,\n \"num_unique_values\": 14,\n \"samples\": [\n 57,\n 78,\n 10\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\",\n \"https://arxiv.org/pdf/1706.03762\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Techincal Documentation\",\n \"Regulation\",\n \"Scientific Report\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 12,\n \"samples\": [\n \"2.3 Optimizer\",\n \"23.1. What is Lazy Loading?\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"Is there a cleanup phase in the final round?\",\n \"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\",\n \"What was the dropout rate used for the base model?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"B\",\n \"NO\",\n \"6\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"A\",\n \"YES\",\n \"I NEED MORE INFO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, "text/html": [ "\n", "
\n", @@ -2278,13 +840,91 @@ "
\n", "
\n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 14,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 26,\n \"min\": 10,\n \"max\": 98,\n \"num_unique_values\": 14,\n \"samples\": [\n 57,\n 78,\n 10\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\",\n \"https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\",\n \"https://arxiv.org/pdf/1706.03762\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Techincal Documentation\",\n \"Regulation\",\n \"Scientific Report\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 12,\n \"samples\": [\n \"2.3 Optimizer\",\n \"23.1. What is Lazy Loading?\",\n \"5.4 Regularization\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"Is there a cleanup phase in the final round?\",\n \"Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\",\n \"What was the dropout rate used for the base model?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"B\",\n \"NO\",\n \"6\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"A\",\n \"YES\",\n \"I NEED MORE INFO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "26 26 https://authorsalliance.org/wp-content/uploads... \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "29 29 https://arxiv.org/pdf/2201.11903 \n", + "33 33 https://arxiv.org/pdf/2201.11903 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", + "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", + "45 45 https://github.com/mozilla-ai/structured-qa/re... \n", + "57 57 https://github.com/mozilla-ai/structured-qa/re... \n", + "73 73 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "92 92 https://arxiv.org/pdf/2302.13971 \n", + "98 98 https://assets.publishing.service.gov.uk/media... \n", + "\n", + " type \\\n", + "10 Scientific Paper \n", + "26 Techincal Documentation \n", + "28 Scientific Report \n", + "29 Scientific Report \n", + "33 Scientific Report \n", + "34 Scientific Report \n", + "37 Board Game \n", + "42 Board Game \n", + "45 Board Game \n", + "57 Board Game \n", + "73 Techincal Documentation \n", + "78 Techincal Documentation \n", + "92 Scientific Report \n", + "98 Regulation \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "26 CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK... \n", + "28 3.1 Experimental Setup \n", + "29 3.1 Experimental Setup \n", + "33 3.4 Robustness of Chain of Thought \n", + "34 3.2 Results \n", + "37 CARD AND TILE EFFECTS \n", + "42 CARD AND TILE COSTS \n", + "45 CARD AND TILE EFFECTS \n", + "57 CLEANUP PHASE \n", + "73 15.3. API Fundamentals \n", + "78 23.1. What is Lazy Loading? \n", + "92 2.3 Optimizer \n", + "98 Limitations of generative AI and LLMs \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "26 Are Gold Open Access and Green Open Access mut... NO \n", + "28 How many large language models were evaluated? 5 \n", + "29 How many benchmarks were used to evaluate arit... 5 \n", + "33 How many annotators provided independent chain... 3 \n", + "34 How many random samples were examined to under... 100 \n", + "37 How many different races are there? 6 \n", + "42 Can a player pay coins to compensate for missi... YES \n", + "45 Which type of cards provide coins? -A: Gray -B... B \n", + "57 Is there a cleanup phase in the final round? NO \n", + "73 When are virtual addresses assigned to graph a... C \n", + "78 Can you enable lazy loading by setting the env... NO \n", + "92 What value was used for the weight decay? 0.1 \n", + "98 Which of the following is not considered a lim... C \n", + "\n", + " pred_answer pred_section \n", + "10 0. 1 NaN \n", + "26 YES NaN \n", + "28 FIVE NaN \n", + "29 FIVE NaN \n", + "33 THREE NaN \n", + "34 50 NaN \n", + "37 I NEED MORE INFO NaN \n", + "42 NO NaN \n", + "45 I NEED MORE INFO NaN \n", + "57 YES NaN \n", + "73 A NaN \n", + "78 I NEED MORE INFO NaN \n", + "92 0:1 NaN \n", + "98 I NEED MORE INFO NaN " + ] }, + "execution_count": 18, "metadata": {}, - "execution_count": 18 + "output_type": "execute_result" } ], "source": [ @@ -2310,14 +950,14 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.8640776699029126" ] }, + "execution_count": 19, "metadata": {}, - "execution_count": 19 + "output_type": "execute_result" } ], "source": [ @@ -2351,32 +991,86 @@ }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "fcbeac06bbec4921b0c45d61e1e89b88": { + "006194e2d15246c8ac0006f4ad0ec3d5": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_153cbeb65e6f401aac9648d8b11046ff", - "IPY_MODEL_f0972bb4d4634bab9cda10b2baa31a71", - "IPY_MODEL_0803dde3898641b2a8f79ab88eb0653f" - ], - "layout": "IPY_MODEL_e26c645517e54af1ade0e052394e9628" + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_52896569fd8c47a58f7f1e04dc2982ad", + "max": 438349816, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7d60dbc988c547168f59fded596f6326", + "value": 438349816 } }, - "153cbeb65e6f401aac9648d8b11046ff": { + "03078427cd344cd9bdc5e18da8a6dfa4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "050caa495ff1470e9ccbb69e337edf41": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2388,40 +1082,53 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_f06f8db65bac463c98ecb34f2dcb0dfb", + "layout": "IPY_MODEL_1e141e9becd044fbab6394748b9838a2", "placeholder": "​", - "style": "IPY_MODEL_3415497210a047d78a24086b9f88f91c", - "value": "artifact.metadata: 100%" + "style": "IPY_MODEL_99a6d80264534dbdb40563b876fad6ba", + "value": "special_tokens_map.json: 100%" } }, - "f0972bb4d4634bab9cda10b2baa31a71": { + "0803dde3898641b2a8f79ab88eb0653f": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", + "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_ee7f2b57a99c4189b1429db4e6bce3fb", - "max": 1633, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_98ff4a5a268a458da6a7206ed3e437d7", - "value": 1633 + "layout": "IPY_MODEL_70a80cb1055d4786aa1f8a84ef3157ce", + "placeholder": "​", + "style": "IPY_MODEL_794d0b6347ab45b6ac7bf2d7eeb630e4", + "value": " 1.63k/1.63k [00:00<00:00, 48.3kB/s]" + } + }, + "0b4c805c2daf4e7dafbd6aedc458b4ef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "0803dde3898641b2a8f79ab88eb0653f": { + "11e03bd58cf04306aaaa1ddaf9b30c38": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2433,16 +1140,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_70a80cb1055d4786aa1f8a84ef3157ce", + "layout": "IPY_MODEL_2f8c4648e71d4297ab315f8f84814d16", "placeholder": "​", - "style": "IPY_MODEL_794d0b6347ab45b6ac7bf2d7eeb630e4", - "value": " 1.63k/1.63k [00:00<00:00, 48.3kB/s]" + "style": "IPY_MODEL_fd9a1d362a2c4aa4a9217b7913aab50b", + "value": "model.safetensors: 100%" } }, - "e26c645517e54af1ade0e052394e9628": { + "1309b4cac445449ca766b881004dd04c": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2491,62 +1198,31 @@ "width": null } }, - "f06f8db65bac463c98ecb34f2dcb0dfb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "153cbeb65e6f401aac9648d8b11046ff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f06f8db65bac463c98ecb34f2dcb0dfb", + "placeholder": "​", + "style": "IPY_MODEL_3415497210a047d78a24086b9f88f91c", + "value": "artifact.metadata: 100%" } }, - "3415497210a047d78a24086b9f88f91c": { + "1b59bbf7535945c2b290f4068e02214d": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2558,10 +1234,10 @@ "description_width": "" } }, - "ee7f2b57a99c4189b1429db4e6bce3fb": { + "1e141e9becd044fbab6394748b9838a2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2610,10 +1286,49 @@ "width": null } }, - "98ff4a5a268a458da6a7206ed3e437d7": { + "228881dd188447f89f6d5a5e30ba2b39": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_858afce4603c4ac4ad6ae8a9817adfc6", + "max": 405, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fcf8e5b3abed4bd49df30e3235bbbc5d", + "value": 405 + } + }, + "2df45b5f52884ad689c0f7da99f9046d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2efd2ec934cb485ebb222ba39ef9b5c6": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2626,10 +1341,10 @@ "description_width": "" } }, - "70a80cb1055d4786aa1f8a84ef3157ce": { + "2f8c4648e71d4297ab315f8f84814d16": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2678,10 +1393,10 @@ "width": null } }, - "794d0b6347ab45b6ac7bf2d7eeb630e4": { + "3415497210a047d78a24086b9f88f91c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2693,10 +1408,34 @@ "description_width": "" } }, - "6b38d9d92f09428a9d9670f918599e81": { + "3570f41c24a04d039f6e92f9c8e57cc3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6fafe7befa5e46d2890e63327b846172", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_80d6d834e29140a7b452634cba9223e7", + "value": 231508 + } + }, + "42489b00de754d8ebc183a12f649eb8e": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2708,83 +1447,85 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_e14603a547e8473fa824cc7685b62456", - "IPY_MODEL_fab6bb0faa174a4aa02366b037660fa0", - "IPY_MODEL_b4832b95793c4d308d4d662c031f0b51" + "IPY_MODEL_f2404b6235374639bbf22cfdc7f2dd84", + "IPY_MODEL_86b48915fae748eeb3a572e793f8f8c7", + "IPY_MODEL_fc2dcb10a10244b7b19ca3163c0d5b17" ], - "layout": "IPY_MODEL_5bab6e216cdd4fd7a1d514d91056f275" + "layout": "IPY_MODEL_03078427cd344cd9bdc5e18da8a6dfa4" } }, - "e14603a547e8473fa824cc7685b62456": { + "429b6a5553c24b31acb8d7a19f947d63": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_95e5af6381ee424e92e17622294ce455", - "placeholder": "​", - "style": "IPY_MODEL_b38ffca43a2942c78216e663d3712dc2", - "value": "config.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "fab6bb0faa174a4aa02366b037660fa0": { + "4315a7426a7c4bf7a295ed915cb7af0e": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b232acd3b4f240e8b9815454dd78b64d", - "max": 743, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0b4c805c2daf4e7dafbd6aedc458b4ef", - "value": 743 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "b4832b95793c4d308d4d662c031f0b51": { + "46246ea8cd134dcb952f2009bed30ff5": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9cb7af363f1a4372aec5826e2dc4d7e8", - "placeholder": "​", - "style": "IPY_MODEL_1b59bbf7535945c2b290f4068e02214d", - "value": " 743/743 [00:00<00:00, 40.5kB/s]" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c55bbfdc199749a2b2c4e6514b96af71", + "IPY_MODEL_228881dd188447f89f6d5a5e30ba2b39", + "IPY_MODEL_e8fd851273e14870824433d33f71d521" + ], + "layout": "IPY_MODEL_69428342766b4ec28848d8cce5c88328" } }, - "5bab6e216cdd4fd7a1d514d91056f275": { + "481d046b1c2c427abea771e6a900be4b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5210b33e6f5547bd8689c50506d20949": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2833,10 +1574,10 @@ "width": null } }, - "95e5af6381ee424e92e17622294ce455": { + "52896569fd8c47a58f7f1e04dc2982ad": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2885,25 +1626,10 @@ "width": null } }, - "b38ffca43a2942c78216e663d3712dc2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b232acd3b4f240e8b9815454dd78b64d": { + "5bab6e216cdd4fd7a1d514d91056f275": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2952,26 +1678,10 @@ "width": null } }, - "0b4c805c2daf4e7dafbd6aedc458b4ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "9cb7af363f1a4372aec5826e2dc4d7e8": { + "69428342766b4ec28848d8cce5c88328": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3020,25 +1730,10 @@ "width": null } }, - "1b59bbf7535945c2b290f4068e02214d": { + "6b38d9d92f09428a9d9670f918599e81": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c5031f4a757d43ec87fc4dd068773aa4": { - "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3050,62 +1745,17 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_11e03bd58cf04306aaaa1ddaf9b30c38", - "IPY_MODEL_006194e2d15246c8ac0006f4ad0ec3d5", - "IPY_MODEL_6c0db984c21f4b668af8895961c74a91" + "IPY_MODEL_e14603a547e8473fa824cc7685b62456", + "IPY_MODEL_fab6bb0faa174a4aa02366b037660fa0", + "IPY_MODEL_b4832b95793c4d308d4d662c031f0b51" ], - "layout": "IPY_MODEL_b02f570cc69f4da78aad558e9d1c3502" - } - }, - "11e03bd58cf04306aaaa1ddaf9b30c38": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2f8c4648e71d4297ab315f8f84814d16", - "placeholder": "​", - "style": "IPY_MODEL_fd9a1d362a2c4aa4a9217b7913aab50b", - "value": "model.safetensors: 100%" - } - }, - "006194e2d15246c8ac0006f4ad0ec3d5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_52896569fd8c47a58f7f1e04dc2982ad", - "max": 438349816, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_7d60dbc988c547168f59fded596f6326", - "value": 438349816 + "layout": "IPY_MODEL_5bab6e216cdd4fd7a1d514d91056f275" } }, "6c0db984c21f4b668af8895961c74a91": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3123,10 +1773,10 @@ "value": " 438M/438M [00:02<00:00, 236MB/s]" } }, - "b02f570cc69f4da78aad558e9d1c3502": { + "6e25ac32d79340ff9714bc02aaa5f95a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3175,10 +1825,10 @@ "width": null } }, - "2f8c4648e71d4297ab315f8f84814d16": { + "6fafe7befa5e46d2890e63327b846172": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3227,25 +1877,10 @@ "width": null } }, - "fd9a1d362a2c4aa4a9217b7913aab50b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "52896569fd8c47a58f7f1e04dc2982ad": { + "70a80cb1055d4786aa1f8a84ef3157ce": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3294,26 +1929,10 @@ "width": null } }, - "7d60dbc988c547168f59fded596f6326": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "91c6634d9446477caddf5bc0baf79b74": { + "7220498a1cb44256b294c639e016b80e": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3362,10 +1981,10 @@ "width": null } }, - "481d046b1c2c427abea771e6a900be4b": { + "794d0b6347ab45b6ac7bf2d7eeb630e4": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3377,77 +1996,42 @@ "description_width": "" } }, - "46246ea8cd134dcb952f2009bed30ff5": { + "7d60dbc988c547168f59fded596f6326": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c55bbfdc199749a2b2c4e6514b96af71", - "IPY_MODEL_228881dd188447f89f6d5a5e30ba2b39", - "IPY_MODEL_e8fd851273e14870824433d33f71d521" - ], - "layout": "IPY_MODEL_69428342766b4ec28848d8cce5c88328" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "c55bbfdc199749a2b2c4e6514b96af71": { + "80d6d834e29140a7b452634cba9223e7": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b280d7fd36e243f49a55ea5c90b3d96b", - "placeholder": "​", - "style": "IPY_MODEL_8d226412b4e749839f8ed0efc23f3456", - "value": "tokenizer_config.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "228881dd188447f89f6d5a5e30ba2b39": { + "856f08461e6d4d218306310f38dc00ed": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_858afce4603c4ac4ad6ae8a9817adfc6", - "max": 405, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_fcf8e5b3abed4bd49df30e3235bbbc5d", - "value": 405 - } - }, - "e8fd851273e14870824433d33f71d521": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3459,16 +2043,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_b789dd69092449478667563eb96ae51b", + "layout": "IPY_MODEL_d25ed47d1fdf402db19dd3736e79a9cd", "placeholder": "​", - "style": "IPY_MODEL_2df45b5f52884ad689c0f7da99f9046d", - "value": " 405/405 [00:00<00:00, 19.6kB/s]" + "style": "IPY_MODEL_429b6a5553c24b31acb8d7a19f947d63", + "value": " 232k/232k [00:00<00:00, 8.54MB/s]" } }, - "69428342766b4ec28848d8cce5c88328": { + "858afce4603c4ac4ad6ae8a9817adfc6": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3517,10 +2101,49 @@ "width": null } }, - "b280d7fd36e243f49a55ea5c90b3d96b": { + "86b48915fae748eeb3a572e793f8f8c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_add5098a768f4b8292919aad0f7a95be", + "max": 466081, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4315a7426a7c4bf7a295ed915cb7af0e", + "value": 466081 + } + }, + "8d226412b4e749839f8ed0efc23f3456": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "91c6634d9446477caddf5bc0baf79b74": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3569,10 +2192,10 @@ "width": null } }, - "8d226412b4e749839f8ed0efc23f3456": { + "9459c2d1de6d420cb621a139141f429a": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3584,10 +2207,10 @@ "description_width": "" } }, - "858afce4603c4ac4ad6ae8a9817adfc6": { + "95e5af6381ee424e92e17622294ce455": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3636,26 +2259,10 @@ "width": null } }, - "fcf8e5b3abed4bd49df30e3235bbbc5d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b789dd69092449478667563eb96ae51b": { + "9761af9a6ca14a03a6313fbc4edba9c7": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3704,113 +2311,41 @@ "width": null } }, - "2df45b5f52884ad689c0f7da99f9046d": { + "98ff4a5a268a458da6a7206ed3e437d7": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "d448a6c57d454df3a40509bebc92d337": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ff4a3e4282164dbcbd30ad0123ce9966", - "IPY_MODEL_3570f41c24a04d039f6e92f9c8e57cc3", - "IPY_MODEL_856f08461e6d4d218306310f38dc00ed" - ], - "layout": "IPY_MODEL_1309b4cac445449ca766b881004dd04c" - } - }, - "ff4a3e4282164dbcbd30ad0123ce9966": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9761af9a6ca14a03a6313fbc4edba9c7", - "placeholder": "​", - "style": "IPY_MODEL_de42f1287b0b42f59efe335d8d96cebe", - "value": "vocab.txt: 100%" - } - }, - "3570f41c24a04d039f6e92f9c8e57cc3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6fafe7befa5e46d2890e63327b846172", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_80d6d834e29140a7b452634cba9223e7", - "value": 231508 - } - }, - "856f08461e6d4d218306310f38dc00ed": { + "99a6d80264534dbdb40563b876fad6ba": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d25ed47d1fdf402db19dd3736e79a9cd", - "placeholder": "​", - "style": "IPY_MODEL_429b6a5553c24b31acb8d7a19f947d63", - "value": " 232k/232k [00:00<00:00, 8.54MB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "1309b4cac445449ca766b881004dd04c": { + "9b1e49d8f8444ced936dee33f632dd83": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3859,10 +2394,25 @@ "width": null } }, - "9761af9a6ca14a03a6313fbc4edba9c7": { + "9c2f76016b7b4e819e3cac5bb8afa2b3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9cb7af363f1a4372aec5826e2dc4d7e8": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3911,25 +2461,56 @@ "width": null } }, - "de42f1287b0b42f59efe335d8d96cebe": { + "a000ee3a60924a73ae9a07605848924c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5210b33e6f5547bd8689c50506d20949", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2efd2ec934cb485ebb222ba39ef9b5c6", + "value": 112 } }, - "6fafe7befa5e46d2890e63327b846172": { + "a4f77aec2ec947508a6430f99df88cb4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_050caa495ff1470e9ccbb69e337edf41", + "IPY_MODEL_a000ee3a60924a73ae9a07605848924c", + "IPY_MODEL_b1392a03cdb045c2833aaf5370ba9879" + ], + "layout": "IPY_MODEL_ef6d766fc5304b34b14ce2722570b785" + } + }, + "add5098a768f4b8292919aad0f7a95be": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3965,39 +2546,23 @@ "max_width": null, "min_height": null, "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "80d6d834e29140a7b452634cba9223e7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "d25ed47d1fdf402db19dd3736e79a9cd": { + "b02f570cc69f4da78aad558e9d1c3502": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4046,92 +2611,10 @@ "width": null } }, - "429b6a5553c24b31acb8d7a19f947d63": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "42489b00de754d8ebc183a12f649eb8e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f2404b6235374639bbf22cfdc7f2dd84", - "IPY_MODEL_86b48915fae748eeb3a572e793f8f8c7", - "IPY_MODEL_fc2dcb10a10244b7b19ca3163c0d5b17" - ], - "layout": "IPY_MODEL_03078427cd344cd9bdc5e18da8a6dfa4" - } - }, - "f2404b6235374639bbf22cfdc7f2dd84": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7220498a1cb44256b294c639e016b80e", - "placeholder": "​", - "style": "IPY_MODEL_ea5a5b11334243048936cd96d7189348", - "value": "tokenizer.json: 100%" - } - }, - "86b48915fae748eeb3a572e793f8f8c7": { + "b1392a03cdb045c2833aaf5370ba9879": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_add5098a768f4b8292919aad0f7a95be", - "max": 466081, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4315a7426a7c4bf7a295ed915cb7af0e", - "value": 466081 - } - }, - "fc2dcb10a10244b7b19ca3163c0d5b17": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4143,16 +2626,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_9b1e49d8f8444ced936dee33f632dd83", + "layout": "IPY_MODEL_6e25ac32d79340ff9714bc02aaa5f95a", "placeholder": "​", - "style": "IPY_MODEL_9c2f76016b7b4e819e3cac5bb8afa2b3", - "value": " 466k/466k [00:00<00:00, 3.24MB/s]" + "style": "IPY_MODEL_9459c2d1de6d420cb621a139141f429a", + "value": " 112/112 [00:00<00:00, 6.91kB/s]" } }, - "03078427cd344cd9bdc5e18da8a6dfa4": { + "b232acd3b4f240e8b9815454dd78b64d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4201,10 +2684,10 @@ "width": null } }, - "7220498a1cb44256b294c639e016b80e": { + "b280d7fd36e243f49a55ea5c90b3d96b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4253,10 +2736,10 @@ "width": null } }, - "ea5a5b11334243048936cd96d7189348": { + "b38ffca43a2942c78216e663d3712dc2": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4268,10 +2751,31 @@ "description_width": "" } }, - "add5098a768f4b8292919aad0f7a95be": { + "b4832b95793c4d308d4d662c031f0b51": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9cb7af363f1a4372aec5826e2dc4d7e8", + "placeholder": "​", + "style": "IPY_MODEL_1b59bbf7535945c2b290f4068e02214d", + "value": " 743/743 [00:00<00:00, 40.5kB/s]" + } + }, + "b789dd69092449478667563eb96ae51b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4320,26 +2824,53 @@ "width": null } }, - "4315a7426a7c4bf7a295ed915cb7af0e": { + "c5031f4a757d43ec87fc4dd068773aa4": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_11e03bd58cf04306aaaa1ddaf9b30c38", + "IPY_MODEL_006194e2d15246c8ac0006f4ad0ec3d5", + "IPY_MODEL_6c0db984c21f4b668af8895961c74a91" + ], + "layout": "IPY_MODEL_b02f570cc69f4da78aad558e9d1c3502" } }, - "9b1e49d8f8444ced936dee33f632dd83": { + "c55bbfdc199749a2b2c4e6514b96af71": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b280d7fd36e243f49a55ea5c90b3d96b", + "placeholder": "​", + "style": "IPY_MODEL_8d226412b4e749839f8ed0efc23f3456", + "value": "tokenizer_config.json: 100%" + } + }, + "d25ed47d1fdf402db19dd3736e79a9cd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4388,25 +2919,10 @@ "width": null } }, - "9c2f76016b7b4e819e3cac5bb8afa2b3": { + "d448a6c57d454df3a40509bebc92d337": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a4f77aec2ec947508a6430f99df88cb4": { - "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4418,62 +2934,32 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_050caa495ff1470e9ccbb69e337edf41", - "IPY_MODEL_a000ee3a60924a73ae9a07605848924c", - "IPY_MODEL_b1392a03cdb045c2833aaf5370ba9879" + "IPY_MODEL_ff4a3e4282164dbcbd30ad0123ce9966", + "IPY_MODEL_3570f41c24a04d039f6e92f9c8e57cc3", + "IPY_MODEL_856f08461e6d4d218306310f38dc00ed" ], - "layout": "IPY_MODEL_ef6d766fc5304b34b14ce2722570b785" - } - }, - "050caa495ff1470e9ccbb69e337edf41": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1e141e9becd044fbab6394748b9838a2", - "placeholder": "​", - "style": "IPY_MODEL_99a6d80264534dbdb40563b876fad6ba", - "value": "special_tokens_map.json: 100%" + "layout": "IPY_MODEL_1309b4cac445449ca766b881004dd04c" } }, - "a000ee3a60924a73ae9a07605848924c": { + "de42f1287b0b42f59efe335d8d96cebe": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5210b33e6f5547bd8689c50506d20949", - "max": 112, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2efd2ec934cb485ebb222ba39ef9b5c6", - "value": 112 + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "b1392a03cdb045c2833aaf5370ba9879": { + "e14603a547e8473fa824cc7685b62456": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4485,16 +2971,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_6e25ac32d79340ff9714bc02aaa5f95a", + "layout": "IPY_MODEL_95e5af6381ee424e92e17622294ce455", "placeholder": "​", - "style": "IPY_MODEL_9459c2d1de6d420cb621a139141f429a", - "value": " 112/112 [00:00<00:00, 6.91kB/s]" + "style": "IPY_MODEL_b38ffca43a2942c78216e663d3712dc2", + "value": "config.json: 100%" } }, - "ef6d766fc5304b34b14ce2722570b785": { + "e26c645517e54af1ade0e052394e9628": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4543,10 +3029,46 @@ "width": null } }, - "1e141e9becd044fbab6394748b9838a2": { + "e8fd851273e14870824433d33f71d521": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b789dd69092449478667563eb96ae51b", + "placeholder": "​", + "style": "IPY_MODEL_2df45b5f52884ad689c0f7da99f9046d", + "value": " 405/405 [00:00<00:00, 19.6kB/s]" + } + }, + "ea5a5b11334243048936cd96d7189348": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ee7f2b57a99c4189b1429db4e6bce3fb": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4595,25 +3117,10 @@ "width": null } }, - "99a6d80264534dbdb40563b876fad6ba": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5210b33e6f5547bd8689c50506d20949": { + "ef6d766fc5304b34b14ce2722570b785": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4662,26 +3169,10 @@ "width": null } }, - "2efd2ec934cb485ebb222ba39ef9b5c6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6e25ac32d79340ff9714bc02aaa5f95a": { + "f06f8db65bac463c98ecb34f2dcb0dfb": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4730,10 +3221,138 @@ "width": null } }, - "9459c2d1de6d420cb621a139141f429a": { + "f0972bb4d4634bab9cda10b2baa31a71": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ee7f2b57a99c4189b1429db4e6bce3fb", + "max": 1633, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_98ff4a5a268a458da6a7206ed3e437d7", + "value": 1633 + } + }, + "f2404b6235374639bbf22cfdc7f2dd84": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7220498a1cb44256b294c639e016b80e", + "placeholder": "​", + "style": "IPY_MODEL_ea5a5b11334243048936cd96d7189348", + "value": "tokenizer.json: 100%" + } + }, + "fab6bb0faa174a4aa02366b037660fa0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b232acd3b4f240e8b9815454dd78b64d", + "max": 743, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0b4c805c2daf4e7dafbd6aedc458b4ef", + "value": 743 + } + }, + "fc2dcb10a10244b7b19ca3163c0d5b17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9b1e49d8f8444ced936dee33f632dd83", + "placeholder": "​", + "style": "IPY_MODEL_9c2f76016b7b4e819e3cac5bb8afa2b3", + "value": " 466k/466k [00:00<00:00, 3.24MB/s]" + } + }, + "fcbeac06bbec4921b0c45d61e1e89b88": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_153cbeb65e6f401aac9648d8b11046ff", + "IPY_MODEL_f0972bb4d4634bab9cda10b2baa31a71", + "IPY_MODEL_0803dde3898641b2a8f79ab88eb0653f" + ], + "layout": "IPY_MODEL_e26c645517e54af1ade0e052394e9628" + } + }, + "fcf8e5b3abed4bd49df30e3235bbbc5d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fd9a1d362a2c4aa4a9217b7913aab50b": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4744,10 +3363,31 @@ "_view_name": "StyleView", "description_width": "" } + }, + "ff4a3e4282164dbcbd30ad0123ce9966": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9761af9a6ca14a03a6313fbc4edba9c7", + "placeholder": "​", + "style": "IPY_MODEL_de42f1287b0b42f59efe335d8d96cebe", + "value": "vocab.txt: 100%" + } } } } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/benchmark/gemini_find_retrieve_answer.ipynb b/benchmark/gemini_find_retrieve_answer.ipynb index e66bfdd..e940e99 100644 --- a/benchmark/gemini_find_retrieve_answer.ipynb +++ b/benchmark/gemini_find_retrieve_answer.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -46,120 +46,14 @@ "id": "2HoyF-xbHEgv", "outputId": "70e679cc-b6be-4c19-ca1f-1148c8f5e27a" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-q1o0cypa\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-q1o0cypa\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit a02ffd7c45a36261597af3f00a2316d7e349d05b\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev109+ga02ffd7) (0.27.1)\n", - "Collecting loguru (from structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev109+ga02ffd7) (2.10.6)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev109+ga02ffd7) (6.0.2)\n", - "Collecting rapidfuzz (from structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev109+ga02ffd7) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev109+ga02ffd7) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev109+ga02ffd7) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev109+ga02ffd7) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev109+ga02ffd7) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m57.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m101.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m92.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m70.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev109+ga02ffd7-py3-none-any.whl size=13245 sha256=2f62af9114406a8689c20b7543363be36c644f43c872847130f9e4881dd31586\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-p89axgg2/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=b391db5f33e78d5df3c80cbcc1aa314fa35363c7b79f9301a83167bd7b2d5437\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - "Successfully built structured-qa fire\n", - "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", - "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.12.1 streamlit-1.41.1 structured-qa-0.3.3.dev109+ga02ffd7 watchdog-6.0.0\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + "%pip install --quiet structured-qa" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -167,27 +61,9 @@ "id": "p_hsSGafHEgw", "outputId": "0fc4f4af-f5b5-46bd-8a5c-ad2fb956c5d0" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-02-04 10:16:08-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 21441 (21K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", - "\n", - "structured_qa.csv 100%[===================>] 20.94K --.-KB/s in 0.001s \n", - "\n", - "2025-02-04 10:16:08 (13.8 MB/s) - ‘structured_qa.csv’ saved [21441/21441]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/main/benchmark/structured_qa.csv" ] }, { @@ -289,11 +165,7 @@ "\"\"\"\n", "\n", "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model\n", - "):\n", + "def process_document(document_file, document_data, model):\n", " sections_dir = Path(\"sections\") / Path(document_file).stem\n", " if not sections_dir.exists():\n", " logger.info(\"Splitting document into sections\")\n", @@ -357,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -366,1622 +238,7 @@ "id": "-qtPf9RmHEgz", "outputId": "e03e8757-952c-45b7-9c19-b2e57cc46a2a" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:16:14.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-04 10:16:14.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:16:14.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:16:14.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:16:14.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing HAI_AI-Index-Report-2024.pdf.pdf...\n", - "[ ] (0/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 4/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 5/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 6/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 7/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 8/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 9/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 10/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 11/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 12/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 13/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 14/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 15/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 16/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 17/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 18/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 19/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 20/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 21/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 22/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 23/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 24/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 25/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 26/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 27/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 28/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 29/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 30/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 31/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 32/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 33/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 34/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 35/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 36/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 37/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 38/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 39/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 40/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 41/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 42/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 43/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 44/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 45/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 46/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 47/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 48/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 49/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 50/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 51/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 52/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 53/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 54/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 55/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 56/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 57/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 58/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 59/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 60/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 61/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 62/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 63/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 64/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 65/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 66/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 67/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 68/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 69/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 70/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 71/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 72/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 73/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 74/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 75/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 76/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 77/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 78/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 79/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 80/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 81/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 82/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 83/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 84/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 85/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 86/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 87/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 88/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 89/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 90/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 91/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 92/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 93/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 94/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 95/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 96/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 97/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 98/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 99/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (100/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (101/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (102/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (103/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (104/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (105/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (106/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (107/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (108/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (109/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (110/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (111/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (112/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (113/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (114/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (115/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (116/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (117/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (118/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (119/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (120/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (121/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (122/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (123/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (124/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (125/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (126/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (127/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (128/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (129/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (130/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (131/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (132/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (133/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (134/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (135/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (136/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (137/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (138/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (139/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (140/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (141/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (142/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (143/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (144/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (145/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (146/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (147/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (148/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (149/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (150/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (151/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (152/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (153/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (154/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (155/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (156/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (157/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (158/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (159/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (160/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (161/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (162/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (163/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (164/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (165/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (166/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (167/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (168/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (169/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (170/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (171/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (172/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (173/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (174/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (175/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (176/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (177/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (178/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (179/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (180/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (181/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (182/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (183/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (184/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (185/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (186/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (187/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (188/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (189/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (190/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (191/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (192/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (193/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (194/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (195/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (196/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (197/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (198/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (199/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (200/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (201/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (202/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (203/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (204/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (205/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (206/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (207/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (208/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (209/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (210/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (211/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (212/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (213/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (214/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (215/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (216/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (217/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (218/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (219/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (220/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (221/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (222/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (223/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (224/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (225/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (226/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (227/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (228/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (229/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (230/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (231/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (232/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (233/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (234/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (235/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (236/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (237/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (238/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (239/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (240/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (241/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (242/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (243/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (244/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (245/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (246/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (247/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (248/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (249/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (250/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (251/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (252/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (253/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (254/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (255/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (256/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (257/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (258/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (259/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (260/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (261/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (262/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (263/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (264/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (265/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (266/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (267/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (268/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (269/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (270/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (271/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (272/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (273/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (274/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (275/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (276/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (277/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (278/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (279/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (280/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (281/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (282/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (283/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (284/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (285/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (286/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (287/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (288/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (289/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (290/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (291/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (292/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (293/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (294/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (295/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (296/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (297/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (298/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (299/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (300/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (301/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (302/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (303/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (304/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (305/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (306/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (307/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (308/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (309/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (310/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (311/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (312/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (313/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (314/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (315/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (316/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (317/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (318/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (319/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (320/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (321/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (322/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (323/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (324/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (325/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (326/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (327/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (328/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (329/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (330/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (331/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (332/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (333/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (334/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (335/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (336/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (337/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (338/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (339/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (340/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (341/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (342/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (343/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (344/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (345/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (346/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (347/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (348/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (349/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (350/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (351/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (352/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (353/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (354/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (355/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (356/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (357/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (358/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (359/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (360/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (361/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (362/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (363/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (364/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (365/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (366/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (367/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (368/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (369/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (370/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (371/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (372/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (373/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (374/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (375/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (376/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (377/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (378/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (379/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (380/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (381/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (382/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (383/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (384/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (385/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (386/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (387/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (388/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (389/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (390/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (391/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (392/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (393/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (394/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (395/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (396/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (397/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (398/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (399/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (400/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (401/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (402/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (403/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (404/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (405/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (406/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (407/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (408/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (409/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (410/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (411/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (412/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (413/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (414/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (415/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (416/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (417/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (418/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (419/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (420/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (421/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (422/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (423/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (424/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (425/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (426/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (427/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (428/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (429/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (430/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (431/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (432/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (433/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (434/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (435/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (436/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (437/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (438/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (439/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (440/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (441/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (442/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (443/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (444/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (445/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (446/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (447/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (448/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (449/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (450/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (451/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (452/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (453/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (454/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (455/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (456/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (457/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (458/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (459/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (460/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (461/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (462/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (463/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (464/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (465/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (466/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (467/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (468/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (469/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (470/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (471/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (472/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (473/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (474/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (475/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (476/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (477/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (478/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (479/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (480/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (481/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (482/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (483/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (484/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (485/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (486/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (487/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (488/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (489/502)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (490/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (491/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (492/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (493/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (494/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (495/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (496/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (497/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (498/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (499/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (500/502)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (501/502)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (502/502)\b\b\b\b\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:21:38.611\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:38.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:38.763\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 84 sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:38.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:38.780\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:38.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:38.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:38.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 0\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:21:51.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:52.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:53.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:55.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:55.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:55.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:56.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:57.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:57.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:57.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:58.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:21:59.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:22:00.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:22:00.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:01.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:02.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:04.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:05.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:06.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:06.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:06.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:07.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:08.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:09.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:10.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:10.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:10.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:23:10.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:18.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:20.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:20.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:20.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:21.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:22.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:23.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:25.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:25.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:25.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:26.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:27.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 25\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:27.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:27.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:28.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:24:28.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:30.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:30.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:30.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:31.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:32.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:34.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:35.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:36.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:37.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:38.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:40.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:25:40.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:41.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:42.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:45.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:46.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:47.736\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:49.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:50.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:51.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:52.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:26:52.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:27:53.955\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:27:54.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:27:56.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:27:57.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:27:58.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:27:59.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:28:00.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:28:01.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:28:03.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:28:03.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:04.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:05.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:08.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:09.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:11.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:11.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:11.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:12.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:13.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:14.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:29:14.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:15.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:17.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:18.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:19.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:20.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:21.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:22.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:24.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:25.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:25.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:25.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:25.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:25.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 1706.03762.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 1706.03762.pdf...\n", - "[ ] (0/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 1/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 2/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 3/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 4/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 5/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 6/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 7/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 8/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 9/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (10/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (11/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (12/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (13/15)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (14/15)\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (15/15)\b\b\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:30:30.455\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.466\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 12 sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.474\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:30:30.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:31:31.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:33.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:33.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:33.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:36.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:36.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:36.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:37.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:39.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:39.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:39.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:40.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:42.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:42.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:42.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:43.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:31:43.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:45.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:45.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:45.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:47.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:48.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:48.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:48.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:50.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:52.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 8\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:52.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:52.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:53.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:54.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:54.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:54.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:56.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:57.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:57.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:57.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:32:57.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:33:58.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:00.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 4000\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:00.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:00.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:01.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:02.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:04.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:05.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:07.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:08.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 0.1\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:08.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:08.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:08.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:34:08.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2106.09685.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 2106.09685.pdf.pdf...\n", - "[ ] (0/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 1/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 2/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 3/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 4/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 5/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 6/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 7/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 8/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 9/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (10/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (11/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (12/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (13/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (14/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (15/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (16/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (17/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (18/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (19/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (20/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (21/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (22/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (23/26)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (24/26)\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (25/26)\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (26/26)\b\b\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:35:27.059\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:27.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:27.073\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 22 sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:27.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:27.080\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:27.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:27.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:27.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:35:28.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:35:28.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:29.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:30.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:31.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:31.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:31.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:33.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:34.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:35.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:36.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:36.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:36.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:37.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:39.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:36:39.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:40.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:41.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:42.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:44.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:44.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:44.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:45.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:46.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:47.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:48.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:49.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:37:49.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:50.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:51.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:52.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:53.828\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:54.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:54.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:55.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:55.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:38:55.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2201.11903.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 2201.11903.pdf...\n", - "[ ] (0/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 2/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 3/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 4/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 5/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 6/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 7/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 8/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 9/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (10/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (11/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (12/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (13/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (14/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (15/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (16/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (17/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (18/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (19/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (20/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (21/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (22/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (23/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (24/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (25/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (26/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (27/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (28/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (29/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (30/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (31/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (32/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (33/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (34/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (35/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (36/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (37/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (38/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (39/43)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (40/43)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (41/43)\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (42/43)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (43/43)\b\b\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:39:55.481\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:55.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:55.500\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 21 sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:55.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:55.510\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:55.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:55.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:55.518\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:39:56.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:57.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:57.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:57.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:39:58.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:40:00.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:40:00.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:01.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:02.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:03.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:04.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:05.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:06.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:07.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:09.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:09.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:09.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:10.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:41:10.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:11.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 5\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:11.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:11.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:13.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:14.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:14.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:14.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:15.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:17.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: Based on the provided information (Table 13), the model has seen two examples for the letter concatenation task: \"Waldo Schmidt\" and \"Daniel Friedman\". Thus, the model has seen 2 example names.\n", - "So the answer is B.\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:17.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:17.010\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:17.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:19.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:19.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:19.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:20.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:21.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:21.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:21.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:42:21.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:22.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:24.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:25.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:26.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:27.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:28.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:29.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:30.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:31.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:43:31.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:32.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:33.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:34.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:35.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:36.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:38.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:39.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:40.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:41.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:44:41.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:42.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:43.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:44.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:45.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:46.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:47.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:49.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:50.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:51.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:45:51.704\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:46:52.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:46:54.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:46:55.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:46:56.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:46:57.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:46:58.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:47:00.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:47:01.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:47:02.553\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:47:02.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:03.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:04.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:05.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:05.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:06.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:06.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:06.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2210.05189.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 2210.05189.pdf...\n", - "[ ] (0/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (1/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (2/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (3/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (4/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (5/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (6/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (7/8)\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (8/8)\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:48:08.615\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:08.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:08.625\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 12 sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:08.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:08.631\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:08.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:08.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:08.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:48:09.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:10.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:10.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:10.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:11.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:12.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:13.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:14.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:48:14.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:16.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:17.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:18.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:19.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:20.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:21.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:22.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:23.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:23.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:49:23.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:24.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:26.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:26.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:27.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:28.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:29.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:30.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:31.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:32.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:50:32.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:33.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:33.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:33.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:34.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:35.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:36.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:37.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:38.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:39.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:40.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:41.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:51:41.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:43.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:43.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:44.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:45.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:46.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:48.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:49.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:51.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:52:51.003\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:52.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:53.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:54.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:55.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:56.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:57.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:57.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:57.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:58.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:53:59.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:54:00.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:54:00.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:01.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:02.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:03.880\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 14\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:03.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:03.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:04.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:05.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:06.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:07.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:08.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:10.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:10.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:10.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:55:10.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:11.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:12.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:13.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:15.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:16.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:16.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:16.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:16.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:16.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 2302.13971.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 2302.13971.pdf...\n", - "[ ] (0/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 1/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 2/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 3/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 4/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 5/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 6/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 7/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 8/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 9/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (10/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (11/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (12/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (13/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (14/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (15/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (16/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (17/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (18/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (19/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (20/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (21/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (22/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (23/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (24/27)\b\b\b\b\b\b\b\b\b\b\b\b\b\b==\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (25/27)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (26/27)\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (27/27)" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:56:30.134\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:30.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:30.151\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 17 sections\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:30.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:30.160\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:30.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:30.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b\b\b\b\b\b\b\b\b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 10:56:30.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:31.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:32.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:33.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:56:33.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:34.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:35.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:36.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:37.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:38.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:39.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:40.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:41.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:42.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:57:42.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:43.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:44.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:45.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:46.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:47.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:48.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:49.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:50.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:51.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 10:58:51.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:52.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:52.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:52.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:53.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:54.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:55.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:56.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:57.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 10:59:59.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:00:00.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:00:01.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:00:01.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:03.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:04.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:05.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:06.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:07.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:08.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:09.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:10.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:01:10.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:11.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:12.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:13.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:14.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:15.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:15.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:15.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:16.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:17.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:17.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:17.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:18.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:20.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:20.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:20.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:02:20.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:21.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:22.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 20\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:22.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:22.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:23.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:24.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:25.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:26.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:27.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:28.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:29.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:03:29.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:31.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:32.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:33.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:34.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:35.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:36.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:37.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:38.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:39.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:39.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:39.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:04:39.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:40.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:41.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:41.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:41.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:42.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:43.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:43.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:43.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:44.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:45.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:45.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:45.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:45.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:05:45.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf...\n", - "[ ] (0/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 2/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 3/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 4/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 5/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 6/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 7/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 8/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 9/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (10/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (11/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] (12/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (13/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (14/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (15/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (16/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (17/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (18/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (19/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (20/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (21/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (22/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (23/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (24/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (25/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (26/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (27/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (28/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (29/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (30/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (31/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (32/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (33/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (34/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (35/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (36/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (37/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (38/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (39/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (40/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (41/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (42/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (43/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (44/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (45/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (46/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (47/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (48/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (49/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (50/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (51/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (52/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (53/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (54/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (55/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (56/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (57/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (58/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (59/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (60/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (61/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (62/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (63/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (64/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (65/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (66/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (67/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (68/74)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (69/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (70/74)\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (71/74)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (72/74)\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (73/74)\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (74/74)" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:06:05.243\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:05.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:05.272\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 26 sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:05.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:05.281\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:05.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:05.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:05.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b\b\b\b\b\b\b\b\b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:06:06.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:07.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:08.351\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:06:08.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:09.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:09.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:09.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:11.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:12.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:12.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:12.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:13.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:15.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:16.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:17.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:17.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:17.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:18.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:20.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:20.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:20.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:07:20.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:21.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:22.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:23.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:25.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:25.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:25.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:25.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:25.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf...\n", - "[ ] (0/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 4/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 5/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 6/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 7/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 8/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 9/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 10/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 11/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 12/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 13/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 14/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 15/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 16/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 17/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 18/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 19/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 20/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 21/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 22/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 23/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 24/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 25/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 26/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 27/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 28/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 29/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 30/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 31/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 32/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 33/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 34/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 35/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 36/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 37/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 38/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 39/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 40/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 41/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 42/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 43/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 44/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 45/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 46/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 47/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 48/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 49/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 50/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 51/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 52/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 53/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 54/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 55/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 56/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 57/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 58/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 59/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 60/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 61/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 62/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 63/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 64/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 65/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 66/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 67/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 68/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 69/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 70/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 71/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 72/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 73/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 74/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 75/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 76/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 77/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 78/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 79/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 80/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 81/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 82/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 83/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 84/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 85/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 86/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 87/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 88/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 89/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 90/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 91/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 92/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 93/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 94/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 95/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 96/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 97/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 98/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 99/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (100/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (101/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (102/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (103/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (104/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (105/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (106/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (107/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (108/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (109/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (110/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (111/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (112/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (113/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (114/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (115/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (116/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (117/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (118/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (119/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (120/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (121/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (122/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (123/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (124/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (125/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (126/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (127/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (128/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (129/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (130/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (131/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (132/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (133/140)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (134/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (135/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (136/140)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (137/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (138/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (139/140)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (140/140)\b\b\b\b\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:08:38.611\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:38.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:08:38.650\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 40 sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:38.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:38.666\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:38.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:38.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:38.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:40.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:41.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:42.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:44.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:44.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:44.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:45.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:08:45.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:47.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:48.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:49.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:49.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:49.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:50.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:51.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:51.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:51.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:52.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:53.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:53.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:53.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:54.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:55.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:09:55.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:10:57.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:10:58.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2015\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:10:58.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-04 11:10:58.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:10:59.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:00.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:00.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:01.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:01.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:01.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf...\n", - "[ ] (0/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 4/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 5/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 6/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 7/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 8/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 9/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 10/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 11/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 12/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 13/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 14/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 15/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 16/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 17/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 18/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 19/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 20/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 21/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 22/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 23/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 24/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] ( 25/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 26/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 27/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 28/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] ( 29/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 30/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 31/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] ( 32/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 33/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 34/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 35/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 36/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 37/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] ( 38/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 39/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 40/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] ( 41/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 42/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 43/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 44/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 45/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 46/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 47/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] ( 48/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 49/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 50/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] ( 51/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 52/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 53/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 54/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 55/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 56/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 57/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] ( 58/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 59/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 60/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] ( 61/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 62/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 63/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] ( 64/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 65/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 66/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 67/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 68/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 69/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] ( 70/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 71/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 72/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 73/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] ( 74/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 75/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 76/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 77/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 78/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 79/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] ( 80/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 81/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 82/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] ( 83/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 84/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 85/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 86/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 87/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 88/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 89/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] ( 90/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 91/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 92/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] ( 93/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] ( 94/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] ( 95/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] ( 96/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 97/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 98/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 99/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (100/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (101/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (102/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (103/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (104/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (105/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (106/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (107/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (108/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (109/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (110/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (111/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (112/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (113/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (114/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (115/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (116/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (117/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (118/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (119/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (120/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (121/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (122/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (123/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (124/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (125/129)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (126/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (127/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (128/129)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (129/129)\b\b\b\b\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:11:39.970\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:39.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:11:40.067\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 254 sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:40.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:40.101\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:40.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:40.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:40.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:41.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:42.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:43.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:44.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:45.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:11:45.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:46.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:47.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:48.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:49.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:50.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:51.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:52.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:53.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:54.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:54.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:54.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:12:54.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:13:55.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:13:56.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:13:57.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:13:58.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:13:58.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-02-04 11:13:58.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:13:59.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:14:00.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:14:00.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-02-04 11:14:00.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:14:01.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:14:02.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:14:03.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:14:03.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:04.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:05.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:06.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:08.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:09.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:10.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:11.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:12.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:13.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:15:13.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:14.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:15.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:16.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:17.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:18.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:19.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:20.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:21.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:22.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:16:22.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:23.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:24.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:26.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:27.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:28.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:29.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:30.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:31.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:32.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:17:32.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:18:33.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:18:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:18:34.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:18:34.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:18:34.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:18:34.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing CUDA_C_Programming_Guide.pdf.pdf...\n", - "[ ] (0/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 1/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 2/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 3/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 4/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 5/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 6/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 7/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 8/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 9/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 10/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 11/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 12/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 13/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[ ] ( 14/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 15/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 16/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 17/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 18/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 19/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 20/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 21/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 22/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 23/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 24/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 25/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 26/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 27/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 28/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[= ] ( 29/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 30/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 31/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 32/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 33/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 34/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 35/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 36/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 37/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 38/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 39/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 40/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 41/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 42/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[== ] ( 43/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 44/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 45/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 46/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 47/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 48/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 49/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 50/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 51/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 52/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 53/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 54/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 55/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 56/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 57/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 58/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 59/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 60/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 61/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 62/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 63/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 64/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 65/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 66/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 67/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 68/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 69/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 70/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 71/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==== ] ( 72/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 73/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 74/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 75/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 76/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 77/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 78/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 79/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 80/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 81/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 82/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 83/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 84/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 85/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 86/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] ( 87/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 88/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 89/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 90/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 91/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 92/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 93/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 94/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 95/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 96/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 97/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 98/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 99/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] (100/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] (101/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (102/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (103/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (104/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (105/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (106/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (107/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (108/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (109/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (110/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (111/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (112/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (113/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (114/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (115/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======= ] (116/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (117/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (118/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (119/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (120/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (121/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (122/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (123/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (124/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (125/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (126/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (127/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (128/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (129/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======== ] (130/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (131/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (132/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (133/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (134/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (135/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (136/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (137/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (138/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (139/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (140/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (141/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (142/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (143/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (144/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========= ] (145/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (146/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (147/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (148/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (149/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (150/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (151/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (152/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (153/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (154/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (155/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (156/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (157/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (158/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (159/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (160/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (161/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (162/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (163/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (164/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (165/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (166/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (167/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (168/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (169/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (170/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (171/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (172/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (173/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========== ] (174/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (175/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (176/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (177/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (178/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (179/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (180/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (181/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (182/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (183/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (184/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (185/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (186/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (187/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (188/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============ ] (189/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (190/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (191/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (192/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (193/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (194/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (195/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (196/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (197/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (198/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (199/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (200/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (201/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (202/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] (203/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (204/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (205/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (206/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (207/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (208/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (209/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (210/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (211/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (212/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (213/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (214/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (215/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (216/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (217/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============== ] (218/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (219/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (220/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (221/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (222/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (223/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (224/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (225/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (226/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (227/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (228/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (229/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (230/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (231/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (232/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (233/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (234/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (235/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (236/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (237/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (238/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (239/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (240/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (241/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (242/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (243/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (244/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (245/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (246/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] (247/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (248/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (249/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (250/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (251/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (252/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (253/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (254/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (255/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (256/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (257/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (258/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (259/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (260/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================= ] (261/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (262/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (263/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (264/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (265/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (266/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (267/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (268/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (269/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (270/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (271/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (272/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (273/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (274/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (275/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================== ] (276/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (277/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (278/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (279/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (280/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (281/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (282/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (283/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (284/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (285/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (286/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (287/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (288/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (289/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================== ] (290/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (291/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (292/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (293/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (294/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (295/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (296/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (297/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (298/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (299/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (300/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (301/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (302/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (303/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (304/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (305/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (306/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (307/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (308/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (309/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (310/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (311/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (312/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (313/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (314/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (315/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (316/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (317/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (318/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (319/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================== ] (320/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (321/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (322/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (323/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (324/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (325/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (326/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (327/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (328/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (329/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (330/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (331/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (332/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (333/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================== ] (334/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (335/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (336/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (337/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (338/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (339/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (340/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (341/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (342/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (343/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (344/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (345/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (346/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (347/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (348/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] (349/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (350/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (351/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (352/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (353/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (354/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (355/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (356/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (357/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (358/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (359/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (360/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (361/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (362/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================== ] (363/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (364/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (365/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (366/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (367/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (368/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (369/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (370/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (371/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (372/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (373/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (374/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (375/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (376/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (377/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (378/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (379/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (380/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (381/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (382/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (383/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (384/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (385/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (386/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (387/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (388/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (389/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (390/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (391/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] (392/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (393/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (394/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (395/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (396/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (397/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (398/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (399/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (400/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (401/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (402/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (403/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (404/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (405/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (406/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=========================== ] (407/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (408/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (409/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (410/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (411/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (412/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (413/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (414/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (415/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (416/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (417/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (418/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (419/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (420/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================ ] (421/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (422/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (423/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (424/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (425/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (426/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (427/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (428/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (429/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (430/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (431/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (432/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (433/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (434/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (435/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================= ] (436/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (437/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (438/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (439/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (440/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (441/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (442/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (443/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (444/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (445/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (446/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (447/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (448/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (449/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (450/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (451/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (452/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (453/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (454/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (455/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (456/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (457/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (458/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (459/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (460/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (461/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (462/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (463/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (464/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============================== ] (465/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (466/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (467/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (468/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (469/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (470/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (471/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (472/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (473/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (474/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (475/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (476/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (477/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (478/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (479/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================ ] (480/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (481/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (482/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (483/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (484/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (485/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (486/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (487/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (488/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (489/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (490/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (491/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (492/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (493/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (494/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (495/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (496/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (497/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (498/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (499/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (500/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (501/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (502/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (503/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (504/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (505/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (506/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (507/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (508/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================== ] (509/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (510/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (511/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (512/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (513/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (514/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (515/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (516/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (517/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (518/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (519/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (520/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (521/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (522/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (523/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (524/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (525/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (526/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (527/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (528/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (529/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (530/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (531/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (532/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (533/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (534/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (535/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (536/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (537/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (538/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (539/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (540/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (541/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (542/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (543/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (544/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (545/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (546/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (547/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (548/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (549/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (550/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (551/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===================================== ] (552/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (553/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (554/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (555/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (556/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (557/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (558/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (559/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (560/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (561/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (562/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (563/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (564/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (565/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (566/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====================================== ] (567/582)\b\b\b\b\b\b\b\b\b\b\b\b\b=\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (568/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (569/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (570/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (571/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (572/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (573/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (574/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (575/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (576/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (577/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (578/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (579/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (580/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================================= ] (581/582)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (582/582)\b\b\b\b\b\b\b\b\b\b\b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:20:20.044\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 11:20:20.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:20:20.245\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 447 sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:20:20.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:20:20.318\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 11:20:20.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 11:20:20.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-04 11:20:20.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:30.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:31.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:32.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:33.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1024\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:33.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:33.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:34.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:35.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:35.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:35.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:36.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:22:36.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:38.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:38.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:38.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:39.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:40.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:40.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:40.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:41.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:42.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:43.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:44.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:45.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:46.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:46.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:46.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:23:46.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:48.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:49.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:50.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:51.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:52.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:53.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:53.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:53.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:54.864\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:55.833\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:55.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:55.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:57.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:24:57.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:25:58.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:25:58.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-02-04 11:25:58.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:25:59.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:00.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: B\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:00.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:00.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:01.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:02.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:02.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:02.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:03.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:04.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:04.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:04.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:06.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:07.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:07.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:07.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:07.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:07.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 7DUME_EN01_Rules.pdf.pdf...\n", - "[ ] (0/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[===== ] (1/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] (2/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=============== ] (3/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] (4/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================= ] (5/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] (6/8)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=================================== ] (7/8)\b\b\b\b\b\b\b\b\b\b\b\b=====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (8/8)\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:26:16.222\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.231\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 25 sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.246\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:26:16.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:27:17.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:18.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:18.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:18.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:19.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:20.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:20.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:20.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:21.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:22.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:23.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:24.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:25.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:27:25.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:26.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:27.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:28.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:29.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:30.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:31.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:32.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:33.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:34.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:28:34.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:35.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:36.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:37.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:38.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:39.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:40.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:41.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:42.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:43.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:29:43.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:44.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:45.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:46.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:47.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:49.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:50.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:51.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:53.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:30:53.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:54.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:55.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:56.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:57.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:58.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:59.801\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 6\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:59.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-04 11:31:59.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:32:00.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:32:02.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:32:02.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-04 11:32:02.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:32:03.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:32:03.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:04.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:04.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:04.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:05.553\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:06.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:07.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:08.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:09.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:11.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:12.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:13.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:33:13.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:14.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:15.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:16.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:17.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 3\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:17.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:17.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:19.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:20.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:20.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:20.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:21.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:22.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:23.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:34:23.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:24.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:26.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:27.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:28.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:29.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:29.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:29.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:30.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:31.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 2\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:31.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:31.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:32.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:33.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:35:33.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:34.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:35.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:36.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:37.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:38.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:40.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:41.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:42.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:42.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:42.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:43.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:36:43.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:44.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:45.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:46.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:48.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:49.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:50.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:51.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:52.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:53.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:37:53.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:38:54.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:38:55.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:38:56.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:38:57.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:38:59.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:39:00.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:39:01.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:39:02.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:39:03.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:39:03.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:04.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:05.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:07.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:08.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:09.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:11.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:12.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:13.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:14.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:40:14.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:15.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:17.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:18.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:19.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:20.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:21.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:22.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:23.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:24.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:41:24.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:25.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:26.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:26.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:26.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:28.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:29.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:30.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:31.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:32.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:33.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:34.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:42:34.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:36.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:37.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:39.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:40.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:42.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:43.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:44.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:46.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:47.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:43:47.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:49.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:50.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:52.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:53.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:54.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:56.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:57.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:44:58.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:45:00.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:45:00.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:01.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:02.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:03.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: C\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:03.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:03.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:05.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:06.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:07.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:09.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:10.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:11.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:46:11.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:12.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:14.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:15.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:17.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:18.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:19.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:20.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:21.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:22.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:47:22.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:23.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:24.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:25.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:26.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:28.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:29.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:30.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:31.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:32.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:48:32.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:33.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:35.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:36.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:37.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:38.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:39.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:40.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:42.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:43.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:49:43.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:44.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:46.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:47.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:47.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:47.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:48.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:49.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:49.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:49.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:51.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:52.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:53.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:55.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 7\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:55.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:55.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:55.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mSplitting document into sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:50:55.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mConverting is_eotn_rulebook.pdf.pdf\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing is_eotn_rulebook.pdf.pdf...\n", - "[ ] (0/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[=== ] ( 1/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[====== ] ( 2/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========== ] ( 3/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============= ] ( 4/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================ ] ( 5/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================== ] ( 6/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[======================= ] ( 7/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================== ] ( 8/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[============================== ] ( 9/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[================================= ] (10/12)\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b===\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[==================================== ] (11/12)\b\b\b\b\b\b\b\b\b\b\b\b\b====\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[========================================] (12/12)\b\b\b\b\b\b\b\b\b" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:51:02.798\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m88\u001b[0m - \u001b[32m\u001b[1mConverted\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m90\u001b[0m - \u001b[1mExtracting sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.808\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m94\u001b[0m - \u001b[32m\u001b[1mFound 40 sections\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m95\u001b[0m - \u001b[1mWriting sections to sections/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.819\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstructured_qa.preprocessing\u001b[0m:\u001b[36mdocument_to_sections_dir\u001b[0m:\u001b[36m103\u001b[0m - \u001b[32m\u001b[1mDone\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.828\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:51:02.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 11:52:04.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:05.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:06.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:07.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:08.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:09.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:10.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:11.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:13.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:52:13.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:14.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:15.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:16.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:17.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:19.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:20.660\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:21.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:22.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:23.869\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:53:23.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:25.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:26.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:27.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:28.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:29.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:30.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:31.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:32.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:33.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:54:33.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:35.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:36.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:37.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:38.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:39.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:40.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:41.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:42.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:43.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:55:43.849\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:45.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:46.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:47.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:47.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:47.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:48.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:50.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:51.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:52.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:53.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:54.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:56:54.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:57:55.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:57:57.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:57:58.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:57:59.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:00.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:01.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:03.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:04.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:04.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:04.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:05.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:58:05.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:06.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:06.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:06.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:08.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:09.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:10.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:11.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:12.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:13.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:15.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:16.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 25\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:16.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:16.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 11:59:16.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:17.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:18.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:18.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:18.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:20.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:21.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:22.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:23.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:25.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:26.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:27.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:00:27.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:30.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:31.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:32.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:34.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:35.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:36.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:37.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:39.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:01:39.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:40.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:41.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:42.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:43.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:44.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:46.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:47.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:48.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:49.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:02:49.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:03:51.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:03:52.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:03:54.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:03:55.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:03:56.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:03:57.791\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:03:59.012\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:04:00.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:04:01.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:04:01.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:02.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:03.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:05.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:05.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:07.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:07.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:07.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:08.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:09.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:11.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:12.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:05:12.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:13.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:14.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:16.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:17.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:18.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:19.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:20.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:21.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:22.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:06:22.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:24.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:25.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:26.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:27.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:28.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:29.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:31.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:32.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:33.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:07:33.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:34.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:36.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:37.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:38.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:39.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:40.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:41.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:42.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:44.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:08:44.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:45.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:46.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:47.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:48.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:49.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:50.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:51.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:52.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NOT FOUND\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:52.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:52.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:54.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:09:54.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:10:56.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 12:10:56.123\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-04 12:10:56.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:10:57.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:10:58.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:10:59.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:01.518\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:02.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:04.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:05.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:06.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: 1\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:06.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:06.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:11:06.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:08.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:09.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:11.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 3\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:12.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:12.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:12.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 4\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:13.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 5\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:14.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: NO\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:14.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:14.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 6\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:16.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 7\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:17.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: YES\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:17.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m42\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:17.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 8\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:19.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 9\u001b[0m\n", - "\u001b[32m2025-02-04 12:12:19.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 12:13:20.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 1\u001b[0m\n", - "\u001b[32m2025-02-04 12:13:21.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mstructured_qa.model_loaders\u001b[0m:\u001b[36mget_response\u001b[0m:\u001b[36m96\u001b[0m - \u001b[1mCurrent calls: 2\u001b[0m\n", - "\u001b[32m2025-02-04 12:13:22.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m46\u001b[0m - \u001b[1mAnswer: A\n", - "\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "from urllib.request import urlretrieve\n", diff --git a/benchmark/gemini_full_context.ipynb b/benchmark/gemini_full_context.ipynb index 0e11127..67c1c30 100644 --- a/benchmark/gemini_full_context.ipynb +++ b/benchmark/gemini_full_context.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -46,86 +46,14 @@ "id": "QrgOGtuGlyhT", "outputId": "f1657f11-fbbb-4323-b7e7-6d68bcb2e139" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-uq1w5jgv\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-uq1w5jgv\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 97049d67d83ec6129569d442bd365c7a5e490578\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (3.12.1)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev111+g97049d6) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev111+g97049d6) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.17.0)\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + "%pip install --quiet structured-qa" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -133,27 +61,9 @@ "id": "S22kTrfPlyhU", "outputId": "5aad43fa-5177-4bb2-e400-6aa364bfa7f2" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2025-02-04 13:59:31-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 23304 (23K) [text/plain]\n", - "Saving to: ‘structured_qa.csv.1’\n", - "\n", - "\rstructured_qa.csv.1 0%[ ] 0 --.-KB/s \rstructured_qa.csv.1 100%[===================>] 22.76K --.-KB/s in 0.002s \n", - "\n", - "2025-02-04 13:59:32 (10.6 MB/s) - ‘structured_qa.csv.1’ saved [23304/23304]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/main/benchmark/structured_qa.csv" ] }, { @@ -212,7 +122,6 @@ }, "outputs": [], "source": [ - "import json\n", "import time\n", "\n", "\n", @@ -319,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -328,281 +237,7 @@ "id": "AZBwRnfjlyhZ", "outputId": "9ad4c6b4-063a-4ae6-b802-9e7df4d265fd" }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\u001b[32m2025-02-04 13:59:35.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-04 13:59:35.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 13:59:35.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m19\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-04 13:59:35.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 13:59:37.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 13:59:37.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-04 14:00:37.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:00:37.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-04 14:01:40.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:01:40.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-04 14:02:42.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:02:42.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-04 14:03:41.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:03:41.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-04 14:04:44.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:04:44.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-04 14:05:44.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:05:44.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-04 14:06:46.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m25\u001b[0m\n", - "\u001b[32m2025-02-04 14:06:46.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-04 14:07:50.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:07:50.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-04 14:08:50.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:08:50.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:09:50.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:50.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:50.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:51.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:51.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:52.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:52.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:56.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:10:56.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:00.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m6\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:00.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:03.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m6\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:03.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:06.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m8\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:06.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:10.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:10.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:13.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:13.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:17.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m8\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:17.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:21.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:11:21.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:21.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:24.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:24.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:28.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m4000\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:28.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:31.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m0.1\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:31.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:31.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:31.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:33.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:33.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:39.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:39.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:45.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:45.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:51.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m175\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:51.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:57.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:57.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:58.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:58.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:59.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:12:59.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-04 14:13:05.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:13:05.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-04 14:13:12.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mFive\u001b[0m\n", - "\u001b[32m2025-02-04 14:13:12.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:12.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:18.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m5\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:18.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:26.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:26.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:33.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:33.465\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:39.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:39.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:46.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:46.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:53.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m50\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:53.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:53.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:53.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:54.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:54.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:57.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:57.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:59.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", - "\u001b[32m2025-02-04 14:14:59.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-04 14:15:02.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:15:02.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:02.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:04.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m14\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:04.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:07.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:07.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:09.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:09.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:09.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:09.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:11.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:11.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:16.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:16.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:22.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNumber\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:22.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:27.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:27.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:33.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m0.1\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:33.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:38.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m20\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:38.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:43.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:16:43.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:17:43.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-04 14:17:49.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:17:49.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-04 14:17:54.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:17:54.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:00.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:00.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:01.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:01.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:02.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:02.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:11.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:11.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:18.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:18.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:26.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:26.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:34.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:34.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:42.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:42.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:43.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:43.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:44.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:44.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:54.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:18:54.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:19:54.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:04.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:04.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:13.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:13.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:30.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:39.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m2015\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:39.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:49.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:49.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:50.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:50.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:51.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:20:51.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:03.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:03.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:15.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:15.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:28.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:28.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:40.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:40.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:40.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:40.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:42.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:21:42.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:22:42.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-04 14:23:28.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m1024\u001b[0m\n", - "\u001b[32m2025-02-04 14:23:28.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-02-04 14:24:13.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mI need more info\u001b[0m\n", - "\u001b[32m2025-02-04 14:24:13.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-02-04 14:24:56.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m-B: It is transformed into assembly and/or binary form.\n", - "\u001b[0m\n", - "\u001b[32m2025-02-04 14:24:56.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-02-04 14:25:39.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:25:39.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-02-04 14:26:43.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m-C\u001b[0m\n", - "\u001b[32m2025-02-04 14:26:43.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-02-04 14:27:49.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:27:49.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-02-04 14:28:38.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:28:38.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-02-04 14:29:23.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:29:23.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-02-04 14:30:13.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:30:13.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:31:13.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-02-04 14:32:03.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:32:03.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-02-04 14:32:53.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:32:53.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:38.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:38.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:38.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:38.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:40.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:40.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:48.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:48.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:56.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", - "\u001b[32m2025-02-04 14:33:56.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:04.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m6\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:04.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:11.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:11.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:19.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:19.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:27.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m3\u001b[0m\n", - "\u001b[32m2025-02-04 14:34:27.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:35:27.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-04 14:35:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:35:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-04 14:35:43.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:35:43.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-04 14:35:51.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m2\u001b[0m\n", - "\u001b[32m2025-02-04 14:35:51.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:00.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:00.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:07.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mB\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:07.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:16.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mC\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:16.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:24.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:24.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:31.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:31.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:38.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m7\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:38.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:39.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:39.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mUploading file\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:40.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m17\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 14:36:40.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:37:40.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-04 14:37:46.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m4\u001b[0m\n", - "\u001b[32m2025-02-04 14:37:46.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-04 14:37:52.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:37:52.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-04 14:37:57.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:37:57.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:02.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m25\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:02.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:07.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:07.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:12.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:12.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:18.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m1\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:18.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:23.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:23.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:28.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1m1\u001b[0m\n", - "\u001b[32m2025-02-04 14:38:28.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:28.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:34.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:34.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:39.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:39.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:44.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:44.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-04 14:39:49.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document_questions\u001b[0m:\u001b[36m32\u001b[0m - \u001b[1mA\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "from urllib.request import urlretrieve\n", @@ -657,49 +292,11 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "26 26 https://authorsalliance.org/wp-content/uploads... \n", - "28 28 https://arxiv.org/pdf/2201.11903 \n", - "34 34 https://arxiv.org/pdf/2201.11903 \n", - "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "90 90 https://arxiv.org/pdf/2302.13971 \n", - "\n", - " type \\\n", - "26 Techincal Documentation \n", - "28 Scientific Report \n", - "34 Scientific Report \n", - "68 Techincal Documentation \n", - "78 Techincal Documentation \n", - "90 Scientific Report \n", - "\n", - " section \\\n", - "26 CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK... \n", - "28 3.1 Experimental Setup \n", - "34 3.2 Results \n", - "68 5.2. Thread Hierarchy \n", - "78 23.1. What is Lazy Loading? \n", - "90 2.1 Pre-training Data \n", - "\n", - " question answer \\\n", - "26 Are Gold Open Access and Green Open Access mut... NO \n", - "28 How many large language models were evaluated? 5 \n", - "34 How many random samples were examined to under... 100 \n", - "68 Can you identify a thread with a four-dimensio... NO \n", - "78 Can you enable lazy loading by setting the env... NO \n", - "90 How many languages did the Wikipedia data cover? 20 \n", - "\n", - " pred_answer pred_section \n", - "26 YES NaN \n", - "28 FIVE NaN \n", - "34 50 NaN \n", - "68 I NEED MORE INFO NaN \n", - "78 YES NaN \n", - "90 NUMBER NaN " - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 26,\n \"max\": 90,\n \"num_unique_values\": 6,\n \"samples\": [\n 26,\n 28,\n 90\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\",\n \"https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Scientific Report\",\n \"Techincal Documentation\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?\",\n \"3.1 Experimental Setup\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Are Gold Open Access and Green Open Access mutually exclusive.\",\n \"How many large language models were evaluated?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"5\",\n \"20\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"FIVE\",\n \"NUMBER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, "text/html": [ "\n", "
\n", @@ -1011,13 +608,51 @@ "
\n", "
\n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 26,\n \"max\": 90,\n \"num_unique_values\": 6,\n \"samples\": [\n 26,\n 28,\n 90\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\",\n \"https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Scientific Report\",\n \"Techincal Documentation\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK AVAILABLE?\",\n \"3.1 Experimental Setup\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Are Gold Open Access and Green Open Access mutually exclusive.\",\n \"How many large language models were evaluated?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"5\",\n \"20\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"FIVE\",\n \"NUMBER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } + "text/plain": [ + " Unnamed: 0 document \\\n", + "26 26 https://authorsalliance.org/wp-content/uploads... \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "90 90 https://arxiv.org/pdf/2302.13971 \n", + "\n", + " type \\\n", + "26 Techincal Documentation \n", + "28 Scientific Report \n", + "34 Scientific Report \n", + "68 Techincal Documentation \n", + "78 Techincal Documentation \n", + "90 Scientific Report \n", + "\n", + " section \\\n", + "26 CHAPTER 5: WHERE DO YOU WANT TO MAKE YOUR WORK... \n", + "28 3.1 Experimental Setup \n", + "34 3.2 Results \n", + "68 5.2. Thread Hierarchy \n", + "78 23.1. What is Lazy Loading? \n", + "90 2.1 Pre-training Data \n", + "\n", + " question answer \\\n", + "26 Are Gold Open Access and Green Open Access mut... NO \n", + "28 How many large language models were evaluated? 5 \n", + "34 How many random samples were examined to under... 100 \n", + "68 Can you identify a thread with a four-dimensio... NO \n", + "78 Can you enable lazy loading by setting the env... NO \n", + "90 How many languages did the Wikipedia data cover? 20 \n", + "\n", + " pred_answer pred_section \n", + "26 YES NaN \n", + "28 FIVE NaN \n", + "34 50 NaN \n", + "68 I NEED MORE INFO NaN \n", + "78 YES NaN \n", + "90 NUMBER NaN " + ] }, + "execution_count": 12, "metadata": {}, - "execution_count": 12 + "output_type": "execute_result" } ], "source": [ @@ -1042,29 +677,20 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.941747572815534" ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 13 + "output_type": "execute_result" } ], "source": [ "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", "accuracy" ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "rjMNQp8-sZn9" - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -1083,4 +709,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/benchmark/gemini_perfect_context.ipynb b/benchmark/gemini_perfect_context.ipynb index e65e98a..cb060cb 100644 --- a/benchmark/gemini_perfect_context.ipynb +++ b/benchmark/gemini_perfect_context.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -46,22 +46,14 @@ "id": "QrgOGtuGlyhT", "outputId": "ef47af4b-6bee-4dda-c559-e4e5fee1c54b" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "fatal: destination path 'structured-qa' already exists and is not an empty directory.\n" - ] - } - ], + "outputs": [], "source": [ - "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + "!git clone https://github.com/mozilla-ai/structured-qa" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -70,98 +62,7 @@ "id": "S22kTrfPlyhU", "outputId": "ae9617bf-00f7-4b50-d59d-538b285f4eb5" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing ./structured-qa\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.27.1)\n", - "Requirement already satisfied: loguru in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.7.3)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (2.10.6)\n", - "Requirement already satisfied: pymupdf4llm in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (0.0.17)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (6.0.2)\n", - "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (3.12.1)\n", - "Requirement already satisfied: streamlit in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev111+g97049d6) (1.41.1)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev111+g97049d6) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev111+g97049d6) (2.27.2)\n", - "Requirement already satisfied: pymupdf>=1.24.10 in /usr/local/lib/python3.11/dist-packages (from pymupdf4llm->structured-qa==0.3.3.dev111+g97049d6) (1.25.2)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.10.2)\n", - "Requirement already satisfied: watchdog<7,>=2.1.5 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.0.0)\n", - "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.44)\n", - "Requirement already satisfied: pydeck<1,>=0.8.0b4 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.9.1)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev111+g97049d6) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev111+g97049d6) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev111+g97049d6) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev111+g97049d6) (1.17.0)\n", - "Building wheels for collected packages: structured-qa\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev111+g97049d6-py3-none-any.whl size=13247 sha256=a18780844c04a51ee112c6177e9ed610585c15d00f2e5f2dfefa1dcd4d27f151\n", - " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - "Successfully built structured-qa\n", - "Installing collected packages: structured-qa\n", - " Attempting uninstall: structured-qa\n", - " Found existing installation: structured-qa 0.3.3.dev111+g97049d6\n", - " Uninstalling structured-qa-0.3.3.dev111+g97049d6:\n", - " Successfully uninstalled structured-qa-0.3.3.dev111+g97049d6\n", - "Successfully installed structured-qa-0.3.3.dev111+g97049d6\n" - ] - }, - { - "data": { - "application/vnd.colab-display-data+json": { - "id": "9e4f0553c0ad49d7a1731d9def243c03", - "pip_warning": { - "packages": [ - "structured_qa" - ] - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%pip install ./structured-qa" ] @@ -222,7 +123,6 @@ }, "outputs": [], "source": [ - "import json\n", "import time\n", "\n", "\n", @@ -316,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -325,185 +225,7 @@ "id": "AZBwRnfjlyhZ", "outputId": "9d34f3ea-1b9a-40e2-a6ac-9d1d6c00d6c6" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-02-04 15:05:23.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:23.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:23.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:25.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:26.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:26.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:27.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:27.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:29.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:30.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:32.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:33.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:33.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:34.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:36.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:36.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:37.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:05:37.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:37.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:39.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:40.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:40.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:41.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:41.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:43.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:44.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:44.563\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:46.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:46.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:47.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:49.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:50.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-04 15:06:51.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:51.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:53.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:53.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:54.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:55.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:55.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:57.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:57.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-04 15:07:59.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:00.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:00.736\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:02.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:04.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:04.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:05.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:05.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:07.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:08:07.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:07.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:08.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:09.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:10.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:10.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:11.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:11.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:13.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:13.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:14.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:14.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:15.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:17.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:17.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:19.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:20.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:22.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:09:22.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:22.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:24.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:24.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:26.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:27.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:27.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:28.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:30.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:30.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:31.461\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:33.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:33.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:34.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:34.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:36.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-02-04 15:10:37.767\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:37.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:39.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:40.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:40.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:41.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:43.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:43.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:44.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:45.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:45.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:46.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:46.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:48.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:49.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:49.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-04 15:11:51.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:51.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:52.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:54.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:55.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:55.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:56.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:56.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:57.793\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-04 15:12:59.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-04 15:13:00.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-04 15:13:01.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:13:01.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 15:13:03.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:13:03.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-04 15:13:04.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:13:04.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:04.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:05.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:07.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:07.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:08.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:10.462\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:11.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:11.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:13.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:15.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:15.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:16.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:17.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:17.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:18.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:14:18.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:18.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:20.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:21.864\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:21.865\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:23.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:24.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:24.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:26.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:27.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:27.461\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:29.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:29.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:30.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:32.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:32.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:33.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:15:33.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:33.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:35.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:37.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:37.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:38.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:38.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:40.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:40.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:41.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:42.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:42.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:44.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:45.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:45.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:47.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:48.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:16:48.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mWaiting for 60 seconds\u001b[0m\n", - "\u001b[32m2025-02-04 15:17:48.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-04 15:17:50.811\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-04 15:17:52.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 15:17:52.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "\n", diff --git a/benchmark/qwen_2_5_7B_RAGatouille.ipynb b/benchmark/qwen_2_5_7B_RAGatouille.ipynb index 43a01b6..3cb2ce1 100644 --- a/benchmark/qwen_2_5_7B_RAGatouille.ipynb +++ b/benchmark/qwen_2_5_7B_RAGatouille.ipynb @@ -63,8 +63,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "GPU is available!\n" ] @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -98,24 +98,14 @@ "id": "wLom5F1VEjYi", "outputId": "f3ec2127-e687-47c8-d48b-8932d41e1342" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h" - ] - } - ], + "outputs": [], "source": [ "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -123,94 +113,14 @@ "id": "P1eAychVq3my", "outputId": "83e7fe0f-26bc-4c68-d325-cf18752d2888" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.7 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.7/86.7 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m490.9/490.9 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m30.7/30.7 MB\u001b[0m \u001b[31m59.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.0/16.0 MB\u001b[0m \u001b[31m86.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m67.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m48.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m85.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m85.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m68.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.1/286.1 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m422.9/422.9 kB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.6/250.6 kB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.7/298.7 kB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m66.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.9/50.9 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for colbert-ai (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0m" - ] - } - ], - "source": [ - "%pip install --quiet ragatouille PyPDF2" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "I0dl5xGnq3my", - "outputId": "411d0bee-ffd2-46ec-e1c1-5208c78a5b4d" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m113.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m104.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m87.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install --quiet git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" + "%pip install --quiet ragatouille PyPDF2 structured-qa" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -218,27 +128,9 @@ "id": "Nl_haxghq3mz", "outputId": "0b7de590-3202-4ca9-fc33-39d447d1379a" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2025-02-04 18:45:44-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 23304 (23K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", - "\n", - "structured_qa.csv 100%[===================>] 22.76K --.-KB/s in 0.001s \n", - "\n", - "2025-02-04 18:45:45 (15.7 MB/s) - ‘structured_qa.csv’ saved [23304/23304]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/main/benchmark/structured_qa.csv" ] }, { @@ -393,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -415,34 +307,7 @@ "id": "cMBl2dxLq3m0", "outputId": "22cf0c12-4d4c-40fa-911c-f66a70188ff5" }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-04 18:50:10.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:50:11.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf to HAI_AI-Index-Report-2024.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:50:11.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "artifact.metadata: 0%| | 0.00/1.63k [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:19.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:19.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 56 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:32.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:32.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 137 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:45.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:45.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 199 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/7 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:55.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:51:55.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 44 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/2 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:01.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:01.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 144 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:11.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:11.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 168 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/6 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:20.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:20.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 143 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/5 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:28.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:28.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 364 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/12 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:39.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:52:39.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 1803 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\r 0%| | 0/57 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:18.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:18.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 17 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/1 [00:00\u001b[0m:\u001b[36m11\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:32.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:32.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mSetting up RAG\u001b[0m\n", - "/usr/local/lib/python3.11/dist-packages/colbert/utils/amp.py:12: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", - " self.scaler = torch.cuda.amp.GradScaler()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Encoding 48 documents...\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 0%| | 0/2 [00:00Discard any remaining, face-up Island cards and reveal new ones.\n", - " >Pass the First player marker to \u001b[0m\n", - "\u001b[32m2025-02-04 18:53:37.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1m25\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:37.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:37.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1m>There is no limit to the number, type, or order of \n", - "actions a player may take during the Action pha\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:38.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:38.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:38.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mRations are needed for the long journey!\n", - "A player can choose to Pillage a selected Island card with\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:39.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:39.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:39.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mEach \n", - "action draws the clans closer to becoming the greatest empire! The \n", - "game ends in the same roun\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:40.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mI need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:40.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:40.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mGAME FLOW\n", - "Note for Imperial Settlers fans \n", - "You cannot Spend 2 Workers \n", - "to get a Resource or a card.\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:41.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:41.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:41.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mIMPORT ANT: Some Field Locations work only as upgrades. These Fields have \n", - "the Resources on the righ\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:42.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1m1\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:42.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:42.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mNOTE 2: Some abilities in the \n", - "game have a ‘/’ divider between \n", - "presented choices. This should be \n", - "t\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:42.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:42.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:43.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mThus allowing a player to play \n", - "a single Boost card or build a single Field Location before resolvin\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:43.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mNO\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:43.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:43.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:44.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mYES\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:44.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:44.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1m>add 1 Victory Point for every 1 Gold remaining in their supply \n", - "(Gold tokens assigned to cards are\u001b[0m\n", - "\u001b[32m2025-02-04 18:53:45.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mC\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "mltqL7Bhq3m1", - "outputId": "03955779-750d-4a3c-d0bd-b1e1ec9bf5e8" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "2 2 https://arxiv.org/pdf/1706.03762 \n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "28 28 https://arxiv.org/pdf/2201.11903 \n", - "32 32 https://arxiv.org/pdf/2201.11903 \n", - "34 34 https://arxiv.org/pdf/2201.11903 \n", - "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", - "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", - "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", - "45 45 https://github.com/mozilla-ai/structured-qa/re... \n", - "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", - "56 56 https://github.com/mozilla-ai/structured-qa/re... \n", - "62 62 https://github.com/mozilla-ai/structured-qa/re... \n", - "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "90 90 https://arxiv.org/pdf/2302.13971 \n", - "94 94 https://arxiv.org/pdf/2302.13971 \n", - "98 98 https://assets.publishing.service.gov.uk/media... \n", - "100 100 https://assets.publishing.service.gov.uk/media... \n", + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 17,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 2,\n \"max\": 100,\n \"num_unique_values\": 17,\n \"samples\": [\n 2,\n 10,\n 37\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Scientific Report\",\n \"Regulation\",\n \"Board Game\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"GAME END\",\n \"2.1 Pre-training Data\",\n \"3.1 Encoder and Decoder Stacks\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"How many layers compose the decoder?\",\n \"What was the dropout rate used for the base model?\",\n \"How many different races are there?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"NO\",\n \"6\",\n \"20\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"C\",\n \"PDROP= 0.1\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ "\n", - " type section \\\n", - "2 Scientific Paper 3.1 Encoder and Decoder Stacks \n", - "10 Scientific Paper 5.4 Regularization \n", - "28 Scientific Report 3.1 Experimental Setup \n", - "32 Scientific Report 5 Symbolic Reasoning \n", - "34 Scientific Report 3.2 Results \n", - "37 Board Game CARD AND TILE EFFECTS \n", - "41 Board Game CHAPTER OVERVIEW \n", - "42 Board Game CARD AND TILE COSTS \n", - "45 Board Game CARD AND TILE EFFECTS \n", - "55 Board Game EXPEDITION PHASE \n", - "56 Board Game EXPEDITION PHASE \n", - "62 Board Game GAME END \n", - "68 Techincal Documentation 5.2. Thread Hierarchy \n", - "90 Scientific Report 2.1 Pre-training Data \n", - "94 Scientific Report 3 Main results \n", - "98 Regulation Limitations of generative AI and LLMs \n", - "100 Regulation Procurement in an emerging market \n", - "\n", - " question answer \\\n", - "2 How many layers compose the decoder? 6 \n", - "10 What was the dropout rate used for the base mo... 0.1 \n", - "28 How many large language models were evaluated? 5 \n", - "32 Which symbolic reasoning task is used as an ou... A \n", - "34 How many random samples were examined to under... 100 \n", - "37 How many different races are there? 6 \n", - "41 After taking a landmark tile, do you reveal a ... NO \n", - "42 Can a player pay coins to compensate for missi... YES \n", - "45 Which type of cards provide coins? -A: Gray -B... B \n", - "55 Do you need a fish to conquer a distant island? YES \n", - "56 How many victory points you get from each conq... 1 \n", - "62 If player 1 has 30 Victory points and 4 worker... A \n", - "68 Can you identify a thread with a four-dimensio... NO \n", - "90 How many languages did the Wikipedia data cover? 20 \n", - "94 Was the model compared against GPT-4? NO \n", - "98 Which of the following is not considered a lim... C \n", - "100 Which of the following is NOT mentioned as a r... C \n", - "\n", - " pred_answer pred_section \n", - "2 N=6 NaN \n", - "10 PDROP= 0.1 NaN \n", - "28 FIVE NaN \n", - "32 I NEED MORE INFO NaN \n", - "34 50 NaN \n", - "37 I NEED MORE INFO NaN \n", - "41 YES NaN \n", - "42 NO NaN \n", - "45 I NEED MORE INFO NaN \n", - "55 NO NaN \n", - "56 I NEED MORE INFO NaN \n", - "62 C NaN \n", - "68 I NEED MORE INFO NaN \n", - "90 8 NaN \n", - "94 I NEED MORE INFO NaN \n", - "98 I NEED MORE INFO NaN \n", - "100 I NEED MORE INFO NaN " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documentsectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.037625.4 RegularizationWhat was the dropout rate used for the base mo...0.1P5 Training
3434https://arxiv.org/pdf/2201.119033.2 ResultsHow many random samples were examined to under...10050D Appendix: Additional Analysis
3737https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSHow many different races are there?624
4040https://github.com/mozilla-ai/structured-qa/re...CHAPTER OVERVIEWHow many goins does a player take when discard...3NOT FOUNDQuest of the Ring
4242https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE COSTSCan a player pay coins to compensate for missi...YESNOSkills
4545https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSWhich type of cards provide coins? -A: Gray -B...BNOT FOUNDQuest of the Ring
4646https://github.com/mozilla-ai/structured-qa/re...CARD AND TILE EFFECTSDuring which chapter the purple cards become a...CB2
4747https://github.com/mozilla-ai/structured-qa/re...CONQUERING MIDDLE-EARTHIf you place or move an unit and an enemy fort...NOYES2
5050https://github.com/mozilla-ai/structured-qa/re...LOOKOUT PHASEWhat is the maximum number of cards a player m...4NOT FOUND11
5454https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASECan players conquer and pillage the same islan...NOYESEXPEDITION PHASE
5555https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEDo you need a fish to conquer a distant island?YESNOACTION PHASE EXPEDITION PHASE
5656https://github.com/mozilla-ai/structured-qa/re...EXPEDITION PHASEHow many victory points you get from each conq...1NOT FOUND10
5858https://github.com/mozilla-ai/structured-qa/re...LOCATION ABILITIESHow many victory points are granted by a built...1NOT FOUNDA PLAYER HAS TO:
6262https://github.com/mozilla-ai/structured-qa/re...GAME ENDIf player 1 has 30 Victory points and 4 worker...ACCLEANUP PHASE GAME END
6666https://commission.europa.eu/document/download...1.2.1. Internal partitions and doorsWhat fire resistance must vertical partitions ...ANOT FOUND- Car park levels and ramps: h ≥ 2.2 m (h ≥ 2....
6767https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...5.2. Thread HierarchyWhat is the maximum number of threads within a...1024NOT FOUND10.1. Function Execution Space Specifiers
7171https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...6.1.1. Compilation WorkflowWhat is the primary purpose of just-in-time (J...CAChapter 3. A Scalable Programming Model
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...23.1. What is Lazy Loading?Can you enable lazy loading by setting the env...NOYESChapter 21. CUDA Environment Variables
8686https://aiindex.stanford.edu/wp-content/upload...U.S. RegulationWhich of the following was identified as a hig...BYES7.4 AI Regulation
100100https://assets.publishing.service.gov.uk/media...Procurement in an emerging marketWhich of the following is NOT mentioned as a r...CNOT FOUNDBuying generative AI
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", + "40 40 https://github.com/mozilla-ai/structured-qa/re... \n", + "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", + "45 45 https://github.com/mozilla-ai/structured-qa/re... \n", + "46 46 https://github.com/mozilla-ai/structured-qa/re... \n", + "47 47 https://github.com/mozilla-ai/structured-qa/re... \n", + "50 50 https://github.com/mozilla-ai/structured-qa/re... \n", + "54 54 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "56 56 https://github.com/mozilla-ai/structured-qa/re... \n", + "58 58 https://github.com/mozilla-ai/structured-qa/re... \n", + "62 62 https://github.com/mozilla-ai/structured-qa/re... \n", + "66 66 https://commission.europa.eu/document/download... \n", + "67 67 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "71 71 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "86 86 https://aiindex.stanford.edu/wp-content/upload... \n", + "100 100 https://assets.publishing.service.gov.uk/media... \n", + "\n", + " section \\\n", + "10 5.4 Regularization \n", + "34 3.2 Results \n", + "37 CARD AND TILE EFFECTS \n", + "40 CHAPTER OVERVIEW \n", + "42 CARD AND TILE COSTS \n", + "45 CARD AND TILE EFFECTS \n", + "46 CARD AND TILE EFFECTS \n", + "47 CONQUERING MIDDLE-EARTH \n", + "50 LOOKOUT PHASE \n", + "54 EXPEDITION PHASE \n", + "55 EXPEDITION PHASE \n", + "56 EXPEDITION PHASE \n", + "58 LOCATION ABILITIES \n", + "62 GAME END \n", + "66 1.2.1. Internal partitions and doors \n", + "67 5.2. Thread Hierarchy \n", + "71 6.1.1. Compilation Workflow \n", + "78 23.1. What is Lazy Loading? \n", + "86 U.S. Regulation \n", + "100 Procurement in an emerging market \n", + "\n", + " question answer pred_answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 P \n", + "34 How many random samples were examined to under... 100 50 \n", + "37 How many different races are there? 6 2 \n", + "40 How many goins does a player take when discard... 3 NOT FOUND \n", + "42 Can a player pay coins to compensate for missi... YES NO \n", + "45 Which type of cards provide coins? -A: Gray -B... B NOT FOUND \n", + "46 During which chapter the purple cards become a... C B \n", + "47 If you place or move an unit and an enemy fort... NO YES \n", + "50 What is the maximum number of cards a player m... 4 NOT FOUND \n", + "54 Can players conquer and pillage the same islan... NO YES \n", + "55 Do you need a fish to conquer a distant island? YES NO \n", + "56 How many victory points you get from each conq... 1 NOT FOUND \n", + "58 How many victory points are granted by a built... 1 NOT FOUND \n", + "62 If player 1 has 30 Victory points and 4 worker... A C \n", + "66 What fire resistance must vertical partitions ... A NOT FOUND \n", + "67 What is the maximum number of threads within a... 1024 NOT FOUND \n", + "71 What is the primary purpose of just-in-time (J... C A \n", + "78 Can you enable lazy loading by setting the env... NO YES \n", + "86 Which of the following was identified as a hig... B YES \n", + "100 Which of the following is NOT mentioned as a r... C NOT FOUND \n", + "\n", + " pred_section \n", + "10 5 Training \n", + "34 D Appendix: Additional Analysis \n", + "37 4 \n", + "40 Quest of the Ring \n", + "42 Skills \n", + "45 Quest of the Ring \n", + "46 2 \n", + "47 2 \n", + "50 11 \n", + "54 EXPEDITION PHASE \n", + "55 ACTION PHASE EXPEDITION PHASE \n", + "56 10 \n", + "58 A PLAYER HAS TO: \n", + "62 CLEANUP PHASE GAME END \n", + "66 - Car park levels and ramps: h ≥ 2.2 m (h ≥ 2.... \n", + "67 10.1. Function Execution Space Specifiers \n", + "71 Chapter 3. A Scalable Programming Model \n", + "78 Chapter 21. CUDA Environment Variables \n", + "86 7.4 AI Regulation \n", + "100 Buying generative AI " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AhenESELHEgz", - "outputId": "7e08929e-1342-4705-b1b4-e8cac81c35a6" + "outputId": "694d92fe-6dba-4498-c31d-01c9b8da43ea" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.8058252427184466" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", "accuracy" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XoDo3y-ENVs4" + }, + "outputs": [], + "source": [] } ], "metadata": { @@ -349,28 +902,22 @@ }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "082c72195b2b4a13b0ca7cecf368cebf": { + "23eee73375424fac898a9f3d6db4d7ed": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", - "model_name": "HTMLModel", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_96da7d2d05834aa98fe41ad43e5c2c62", - "placeholder": "​", - "style": "IPY_MODEL_e2ae5284ec9349a4a920585c12419e33", - "value": " 8.10G/8.10G [05:55<00:00, 22.8MB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "575b8c7136f846c9ab62e7a796350506": { + "4a58b579deb14018b7fcbb0f0d34f5d3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", @@ -386,7 +933,28 @@ "description_width": "" } }, - "5d98e9f3ef544194b6a99d94c1b56cd2": { + "711405db75514c89ac59e5cde61e0708": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7fb454a8ab3b41539f126d1ef073bc7f", + "placeholder": "​", + "style": "IPY_MODEL_bd44de6636874516b29d6d7ac984a17b", + "value": " 8.10G/8.10G [03:12<00:00, 41.4MB/s]" + } + }, + "72b9204d44fe44baa5be314dda1dc592": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", @@ -438,44 +1006,7 @@ "width": null } }, - "79ed82485b234525976e17fc9ebe47de": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c35c62230dfe4c32b2c45632e691cd46", - "IPY_MODEL_cfeae74e2e3f44e9b2e2dbbb9c756bd8", - "IPY_MODEL_082c72195b2b4a13b0ca7cecf368cebf" - ], - "layout": "IPY_MODEL_b09e7d3ff8cd498aa45faca3b390ac70" - } - }, - "83e32e91aff04efda8ca1efad07249cb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "96da7d2d05834aa98fe41ad43e5c2c62": { + "7fb454a8ab3b41539f126d1ef073bc7f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", @@ -527,7 +1058,52 @@ "width": null } }, - "b09e7d3ff8cd498aa45faca3b390ac70": { + "95226231c7294cbf8d568bfcd21946d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e16789799af9498f96c7f17f01c78f88", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4a58b579deb14018b7fcbb0f0d34f5d3", + "value": 8098525888 + } + }, + "9e1010b2f9ca4547afc980abee0f2446": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a90a43d4f2cf440991471978882e756e", + "placeholder": "​", + "style": "IPY_MODEL_23eee73375424fac898a9f3d6db4d7ed", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } + }, + "a90a43d4f2cf440991471978882e756e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", @@ -579,52 +1155,44 @@ "width": null } }, - "c35c62230dfe4c32b2c45632e691cd46": { + "bd44de6636874516b29d6d7ac984a17b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", - "model_name": "HTMLModel", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5d98e9f3ef544194b6a99d94c1b56cd2", - "placeholder": "​", - "style": "IPY_MODEL_83e32e91aff04efda8ca1efad07249cb", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "cfeae74e2e3f44e9b2e2dbbb9c756bd8": { + "e147d48976ee49dc8a7770ad5422df63": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d320191f9d7146f8b8ccd10e7ad6dd6d", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_575b8c7136f846c9ab62e7a796350506", - "value": 8098525888 + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9e1010b2f9ca4547afc980abee0f2446", + "IPY_MODEL_95226231c7294cbf8d568bfcd21946d1", + "IPY_MODEL_711405db75514c89ac59e5cde61e0708" + ], + "layout": "IPY_MODEL_72b9204d44fe44baa5be314dda1dc592" } }, - "d320191f9d7146f8b8ccd10e7ad6dd6d": { + "e16789799af9498f96c7f17f01c78f88": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", @@ -675,21 +1243,6 @@ "visibility": null, "width": null } - }, - "e2ae5284ec9349a4a920585c12419e33": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } } } } diff --git a/benchmark/qwen_2_5_7B_full_context.ipynb b/benchmark/qwen_2_5_7B_full_context.ipynb index 81dbb27..224d97c 100644 --- a/benchmark/qwen_2_5_7B_full_context.ipynb +++ b/benchmark/qwen_2_5_7B_full_context.ipynb @@ -1,2099 +1,1731 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "9RKWbX7BHEgr" - }, - "source": [ - "# Structured Q&A" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PYuloevCHEgu" - }, - "source": [ - "Source code: https://github.com/mozilla-ai/structured-qa" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pgYAsUQWHEgv" - }, - "source": [ - "Docs: https://mozilla-ai.github.io/structured-qa" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9RKWbX7BHEgr" + }, + "source": [ + "# Structured Q&A" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PYuloevCHEgu" + }, + "source": [ + "Source code: https://github.com/mozilla-ai/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgYAsUQWHEgv" + }, + "source": [ + "Docs: https://mozilla-ai.github.io/structured-qa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EbFAX4heHEgv" + }, + "source": [ + "## Installing dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "metadata": { - "id": "EbFAX4heHEgv" - }, - "source": [ - "## Installing dependencies" - ] + "id": "tk9uiWGn81j_", + "outputId": "eb76f95c-48e6-4ead-a16f-03ce4fc53dac" + }, + "outputs": [], + "source": [ + "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "tk9uiWGn81j_", - "outputId": "eb76f95c-48e6-4ead-a16f-03ce4fc53dac", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h" - ] - } - ], - "source": [ - "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" - ] + "id": "uBJnKqs_MqBV", + "outputId": "095d2c5d-b860-4af9-c1dc-9dc5d6ac8755" + }, + "outputs": [], + "source": [ + "%pip install --quiet PyPDF2 structured-qa" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "p_hsSGafHEgw", + "outputId": "69a9eff5-626a-44ff-9403-6b7db111e765" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "uBJnKqs_MqBV", - "outputId": "095d2c5d-b860-4af9-c1dc-9dc5d6ac8755" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting PyPDF2\n", - " Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)\n", - "Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/232.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━\u001b[0m \u001b[32m225.3/232.6 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: PyPDF2\n", - "Successfully installed PyPDF2-3.0.1\n" - ] - } - ], - "source": [ - "%pip install PyPDF2" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-02-04 18:22:19-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 23304 (23K) [text/plain]\n", + "Saving to: ‘structured_qa.csv’\n", + "\n", + "structured_qa.csv 100%[===================>] 22.76K --.-KB/s in 0.001s \n", + "\n", + "2025-02-04 18:22:20 (30.1 MB/s) - ‘structured_qa.csv’ saved [23304/23304]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/main/benchmark/structured_qa.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0MDfM6cyHEgx" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "5bLJE4U7HEgx" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "y3yUsRDWHEgy" + }, + "outputs": [], + "source": [ + "from loguru import logger" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "d9HBkl8rM5ED" + }, + "outputs": [], + "source": [ + "import PyPDF2\n", + "\n", + "\n", + "def load_pdf(pdf_file: str) -> str | None:\n", + " try:\n", + " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", + " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", + " except Exception as e:\n", + " logger.exception(e)\n", + " return None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AgpODLeJHEgy" + }, + "source": [ + "## Function to Process a single Document" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "n6d8F7cYHEgy" + }, + "outputs": [], + "source": [ + "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", + "You are a rigorous assistant answering questions.\n", + "You must only answer based on the current information available which is:\n", + "\n", + "```\n", + "{CURRENT_INFO}\n", + "```\n", + "\n", + "If the current information available not enough to answer the question,\n", + "you must return \"I need more info\" srting and nothing else:\n", + "\n", + "If the current information is enough to answer, you must return one of the following formats:\n", + "- YES/NO (for boolean questions)\n", + "- Number (for numeric questions)\n", + "- Single letter (for multiple-choice questions)\n", + "\"\"\"\n", + "\n", + "\n", + "def process_document(\n", + " document_file,\n", + " document_data,\n", + " model,\n", + "):\n", + " logger.info(\"Predicting\")\n", + " answers = {}\n", + " sections = {}\n", + " for index, row in document_data.iterrows():\n", + " question = row[\"question\"]\n", + " logger.info(f\"Question: {question}\")\n", + " messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", + " CURRENT_INFO=load_pdf(document_file)\n", + " ),\n", + " },\n", + " {\"role\": \"user\", \"content\": question},\n", + " ]\n", + " try:\n", + " answer = model.get_response(messages)\n", + " except Exception:\n", + " answers = {index: \"Out of context\" for index in document_data.index}\n", + " sections = {index: None for index in document_data.index}\n", + " return answers, sections\n", + " logger.info(f\"Answer: {answer}\")\n", + " answers[index] = answer\n", + " sections[index] = None\n", + "\n", + " return answers, sections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GdlWjANdHEgz" + }, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "9zx8nCaZHEgz" + }, + "outputs": [], + "source": [ + "from structured_qa.model_loaders import load_llama_cpp_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 176, + "referenced_widgets": [ + "e529d49f260c4dd095a9025e15c4cedf", + "37fd0a08531d488abf6cf2c4efe77f91", + "cc95fba624e244a4bf86d7f3c44da644", + "78d7b4f191ff46088877d933a11da241", + "77e5b0640b064226a9de17039489c493", + "0cea8125daee456ca58658176f92e380", + "128fa55577b54565a824102717cf7365", + "a26ab1d05fe044d099e4a11ecc4d95e3", + "45f5c1e972db4eb2b1a163c85d8b3d6f", + "d7a9754f17764fbba485fa9c1176145b", + "604381fc2e1d4ddda28291ddad36edc4" + ] }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2HoyF-xbHEgv", - "outputId": "e5f9a061-5117-447d-8e1b-1c5e6dc1b875" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark\n", - " Cloning https://github.com/mozilla-ai/structured-qa.git (to revision 5-add-benchmark) to /tmp/pip-req-build-lus18o4a\n", - " Running command git clone --filter=blob:none --quiet https://github.com/mozilla-ai/structured-qa.git /tmp/pip-req-build-lus18o4a\n", - " Running command git checkout -b 5-add-benchmark --track origin/5-add-benchmark\n", - " Switched to a new branch '5-add-benchmark'\n", - " Branch '5-add-benchmark' set up to track remote branch '5-add-benchmark' from 'origin'.\n", - " Resolved https://github.com/mozilla-ai/structured-qa.git to commit 0ab4688e84181b78317b8433eca7e3aaf70c8a1b\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (0.27.1)\n", - "Collecting loguru (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (2.10.6)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (6.0.2)\n", - "Collecting rapidfuzz (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev113+g0ab4688) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev113+g0ab4688) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev113+g0ab4688) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m56.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m96.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m79.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev113+g0ab4688-py3-none-any.whl size=13247 sha256=cfb9eb8bc7d4151e7d2e0e833826ea14066950d513d283142b21f54d7c8a29e4\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-zgr6pzyi/wheels/be/a2/66/5bd06ba07afee632d178971d710ae5150fe6379c43e361cd32\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=a21cd75c3bacefec755e81b0472785127c6443c8591f163b12efa5653e33b60e\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - "Successfully built structured-qa fire\n", - "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", - "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.12.1 streamlit-1.41.1 structured-qa-0.3.3.dev113+g0ab4688 watchdog-6.0.0\n" - ] - } - ], - "source": [ - "%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark" - ] + "id": "U4R84hHRHEgz", + "outputId": "828f5651-efa9-412a-e371-973753323984" + }, + "outputs": [], + "source": [ + "model = load_llama_cpp_model(\n", + " \"bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BEzqJJ1yHEgz" + }, + "source": [ + "# Run Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "lJs7zN4N8vhO" - }, - "outputs": [], - "source": [] + "id": "-qtPf9RmHEgz", + "outputId": "661da210-678d-4538-8f83-243f44217742" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "\n", + "logger.info(\"Loading input data\")\n", + "data = pd.read_csv(\"structured_qa.csv\")\n", + "data[\"pred_answer\"] = [None] * len(data)\n", + "data[\"pred_section\"] = [None] * len(data)\n", + "\n", + "for document_link, document_data in data.groupby(\"document\"):\n", + " logger.info(f\"Downloading document {document_link}\")\n", + " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", + " if not Path(downloaded_document).exists():\n", + " urlretrieve(document_link, downloaded_document)\n", + " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", + " else:\n", + " logger.info(f\"File {downloaded_document} already exists\")\n", + "\n", + " answers, sections = process_document(downloaded_document, document_data, model)\n", + "\n", + " for index in document_data.index:\n", + " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", + " data.loc[index, \"pred_section\"] = sections[index]\n", + "\n", + "data.to_csv(\"results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "3eW9TIKjHEgz", + "outputId": "344909f7-1fac-4810-f56f-3fd22452973f" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "p_hsSGafHEgw", - "outputId": "69a9eff5-626a-44ff-9403-6b7db111e765" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 48,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 10,\n \"max\": 100,\n \"num_unique_values\": 48,\n \"samples\": [\n 72,\n 85,\n 71\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Scientific Report\",\n \"Regulation\",\n \"Techincal Documentation\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 29,\n \"samples\": [\n \"Limitations of generative AI and LLMs\",\n \"5.2. Thread Hierarchy\",\n \"2.1. Toilets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 48,\n \"samples\": [\n \"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\",\n \"How many AI-related regulations were enacted in the United States in 2023?\",\n \"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 13,\n \"samples\": [\n \"25\",\n \"C\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"NO\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2025-02-04 18:22:19-- https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 23304 (23K) [text/plain]\n", - "Saving to: ‘structured_qa.csv’\n", - "\n", - "structured_qa.csv 100%[===================>] 22.76K --.-KB/s in 0.001s \n", - "\n", - "2025-02-04 18:22:20 (30.1 MB/s) - ‘structured_qa.csv’ saved [23304/23304]\n", - "\n" - ] - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0documenttypesectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.03762Scientific Paper5.4 RegularizationWhat was the dropout rate used for the base mo...0.1YESNaN
1717https://arxiv.org/pdf/2106.09685.pdfScientific Report4 OUR METHODDoes LoRA work with any neural network contain...YESNONaN
2222https://authorsalliance.org/wp-content/uploads...Techincal DocumentationHOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?how many peer-reviewed open access journals ar...ABNaN
2424https://authorsalliance.org/wp-content/uploads...Techincal DocumentationOVERCOMING RESERVATIONS ABOUT OPEN ACCESSAre publication fees required for all open acc...NOI NEED MORE INFONaN
2727https://arxiv.org/pdf/2201.11903Scientific Report3 Arithmetic ReasoningIs Arithmetic reasoning is a task that languag...NOOUT OF CONTEXTNaN
2828https://arxiv.org/pdf/2201.11903Scientific Report3.1 Experimental SetupHow many large language models were evaluated?5OUT OF CONTEXTNaN
2929https://arxiv.org/pdf/2201.11903Scientific Report3.1 Experimental SetupHow many benchmarks were used to evaluate arit...5OUT OF CONTEXTNaN
3030https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningIs symbolic reasoning usually simple for human...YESOUT OF CONTEXTNaN
3131https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningHow many words have the example names that the...BOUT OF CONTEXTNaN
3232https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningWhich symbolic reasoning task is used as an ou...AOUT OF CONTEXTNaN
3333https://arxiv.org/pdf/2201.11903Scientific Report3.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...3OUT OF CONTEXTNaN
3434https://arxiv.org/pdf/2201.11903Scientific Report3.2 ResultsHow many random samples were examined to under...100OUT OF CONTEXTNaN
3737https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE EFFECTSHow many different races are there?65NaN
3939https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWCan you take a Chapter card and a Landmark til...NOI NEED MORE INFONaN
4040https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWHow many goins does a player take when discard...3I NEED MORE INFONaN
4141https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
4444https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE EFFECTSCan you use a symbol more than once per turn?NOYESNaN
6262https://github.com/mozilla-ai/structured-qa/re...Board GameGAME ENDIf player 1 has 30 Victory points and 4 worker...ACNaN
6363https://commission.europa.eu/document/download...Regulation2.1. ToiletsWhich type of water must be supplied in a toil...BOUT OF CONTEXTNaN
6464https://commission.europa.eu/document/download...RegulationCARBON MONOXIDE DETECTION AND VENTINGIn which type of parkings must a carbon monoxi...COUT OF CONTEXTNaN
6565https://commission.europa.eu/document/download...Regulation4.1. Natural lightingWhat percentage is the daylight factor require...AOUT OF CONTEXTNaN
6666https://commission.europa.eu/document/download...Regulation1.2.1. Internal partitions and doorsWhat fire resistance must vertical partitions ...AOUT OF CONTEXTNaN
6767https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation5.2. Thread HierarchyWhat is the maximum number of threads within a...1024OUT OF CONTEXTNaN
6868https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOOUT OF CONTEXTNaN
6969https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowIn the offline compilation process using nvcc,...BOUT OF CONTEXTNaN
7070https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowWhat are the two ways the host code can be out...BOUT OF CONTEXTNaN
7171https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowWhat is the primary purpose of just-in-time (J...COUT OF CONTEXTNaN
7272https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowWhat happens to the compiled binary code after...AOUT OF CONTEXTNaN
7373https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsWhen are virtual addresses assigned to graph a...COUT OF CONTEXTNaN
7474https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsWhat do graph memory nodes represent in a CUDA...AOUT OF CONTEXTNaN
7575https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsWhen does a graph allocation's lifetime end? -...BOUT OF CONTEXTNaN
7676https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsHow must operations accessing graph memory be ...COUT OF CONTEXTNaN
7777https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation23.1. What is Lazy Loading?What is the primary benefit of Lazy Loading? -...AOUT OF CONTEXTNaN
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation23.1. What is Lazy Loading?Can you enable lazy loading by setting the env...NOOUT OF CONTEXTNaN
7979https://aiindex.stanford.edu/wp-content/upload...Scientific ReportRisk Perceptionwhich type of risk was identified as the leadi...BOUT OF CONTEXTNaN
8080https://aiindex.stanford.edu/wp-content/upload...Scientific ReportRisk PerceptionIn which geographical area were fairness risks...COUT OF CONTEXTNaN
8181https://aiindex.stanford.edu/wp-content/upload...Scientific ReportTraining CostWhat is a major consequence of the rising trai...AOUT OF CONTEXTNaN
8282https://aiindex.stanford.edu/wp-content/upload...Scientific ReportTraining CostHow the AI Index and Epoch AI estimated traini...COUT OF CONTEXTNaN
8383https://aiindex.stanford.edu/wp-content/upload...Scientific ReportLLM Tokenization Introduces UnfairnessWhat is a major source of inequality in AI rel...AOUT OF CONTEXTNaN
8484https://aiindex.stanford.edu/wp-content/upload...Scientific ReportLLM Tokenization Introduces UnfairnessWhat are the three major inequalities resultin...BOUT OF CONTEXTNaN
8585https://aiindex.stanford.edu/wp-content/upload...Scientific ReportU.S. RegulationHow many AI-related regulations were enacted i...25OUT OF CONTEXTNaN
8686https://aiindex.stanford.edu/wp-content/upload...Scientific ReportU.S. RegulationWhich of the following was identified as a hig...BOUT OF CONTEXTNaN
8787https://aiindex.stanford.edu/wp-content/upload...Scientific ReportEuropeWhich country had the highest proportion of fe...BOUT OF CONTEXTNaN
8888https://aiindex.stanford.edu/wp-content/upload...Scientific ReportEuropeWhich countries reported the smallest proporti...COUT OF CONTEXTNaN
9090https://arxiv.org/pdf/2302.13971Scientific Report2.1 Pre-training DataHow many languages did the Wikipedia data cover?208NaN
9494https://arxiv.org/pdf/2302.13971Scientific Report3 Main resultsWas the model compared against GPT-4?NOI NEED MORE INFONaN
9999https://assets.publishing.service.gov.uk/media...RegulationLimitations of generative AI and LLMsCan LLMs be used as an alternative to visiting...NOI NEED MORE INFONaN
100100https://assets.publishing.service.gov.uk/media...RegulationProcurement in an emerging marketWhich of the following is NOT mentioned as a r...CI NEED MORE INFONaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0MDfM6cyHEgx" - }, - "source": [ - "# Setup" + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "17 17 https://arxiv.org/pdf/2106.09685.pdf \n", + "22 22 https://authorsalliance.org/wp-content/uploads... \n", + "24 24 https://authorsalliance.org/wp-content/uploads... \n", + "27 27 https://arxiv.org/pdf/2201.11903 \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "29 29 https://arxiv.org/pdf/2201.11903 \n", + "30 30 https://arxiv.org/pdf/2201.11903 \n", + "31 31 https://arxiv.org/pdf/2201.11903 \n", + "32 32 https://arxiv.org/pdf/2201.11903 \n", + "33 33 https://arxiv.org/pdf/2201.11903 \n", + "34 34 https://arxiv.org/pdf/2201.11903 \n", + "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", + "39 39 https://github.com/mozilla-ai/structured-qa/re... \n", + "40 40 https://github.com/mozilla-ai/structured-qa/re... \n", + "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", + "44 44 https://github.com/mozilla-ai/structured-qa/re... \n", + "62 62 https://github.com/mozilla-ai/structured-qa/re... \n", + "63 63 https://commission.europa.eu/document/download... \n", + "64 64 https://commission.europa.eu/document/download... \n", + "65 65 https://commission.europa.eu/document/download... \n", + "66 66 https://commission.europa.eu/document/download... \n", + "67 67 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "69 69 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "70 70 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "71 71 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "72 72 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "73 73 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "74 74 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "75 75 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "76 76 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "77 77 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "79 79 https://aiindex.stanford.edu/wp-content/upload... \n", + "80 80 https://aiindex.stanford.edu/wp-content/upload... \n", + "81 81 https://aiindex.stanford.edu/wp-content/upload... \n", + "82 82 https://aiindex.stanford.edu/wp-content/upload... \n", + "83 83 https://aiindex.stanford.edu/wp-content/upload... \n", + "84 84 https://aiindex.stanford.edu/wp-content/upload... \n", + "85 85 https://aiindex.stanford.edu/wp-content/upload... \n", + "86 86 https://aiindex.stanford.edu/wp-content/upload... \n", + "87 87 https://aiindex.stanford.edu/wp-content/upload... \n", + "88 88 https://aiindex.stanford.edu/wp-content/upload... \n", + "90 90 https://arxiv.org/pdf/2302.13971 \n", + "94 94 https://arxiv.org/pdf/2302.13971 \n", + "99 99 https://assets.publishing.service.gov.uk/media... \n", + "100 100 https://assets.publishing.service.gov.uk/media... \n", + "\n", + " type section \\\n", + "10 Scientific Paper 5.4 Regularization \n", + "17 Scientific Report 4 OUR METHOD \n", + "22 Techincal Documentation HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER? \n", + "24 Techincal Documentation OVERCOMING RESERVATIONS ABOUT OPEN ACCESS \n", + "27 Scientific Report 3 Arithmetic Reasoning \n", + "28 Scientific Report 3.1 Experimental Setup \n", + "29 Scientific Report 3.1 Experimental Setup \n", + "30 Scientific Report 5 Symbolic Reasoning \n", + "31 Scientific Report 5 Symbolic Reasoning \n", + "32 Scientific Report 5 Symbolic Reasoning \n", + "33 Scientific Report 3.4 Robustness of Chain of Thought \n", + "34 Scientific Report 3.2 Results \n", + "37 Board Game CARD AND TILE EFFECTS \n", + "39 Board Game CHAPTER OVERVIEW \n", + "40 Board Game CHAPTER OVERVIEW \n", + "41 Board Game CHAPTER OVERVIEW \n", + "44 Board Game CARD AND TILE EFFECTS \n", + "62 Board Game GAME END \n", + "63 Regulation 2.1. Toilets \n", + "64 Regulation CARBON MONOXIDE DETECTION AND VENTING \n", + "65 Regulation 4.1. Natural lighting \n", + "66 Regulation 1.2.1. Internal partitions and doors \n", + "67 Techincal Documentation 5.2. Thread Hierarchy \n", + "68 Techincal Documentation 5.2. Thread Hierarchy \n", + "69 Techincal Documentation 6.1.1. Compilation Workflow \n", + "70 Techincal Documentation 6.1.1. Compilation Workflow \n", + "71 Techincal Documentation 6.1.1. Compilation Workflow \n", + "72 Techincal Documentation 6.1.1. Compilation Workflow \n", + "73 Techincal Documentation 15.3. API Fundamentals \n", + "74 Techincal Documentation 15.3. API Fundamentals \n", + "75 Techincal Documentation 15.3. API Fundamentals \n", + "76 Techincal Documentation 15.3. API Fundamentals \n", + "77 Techincal Documentation 23.1. What is Lazy Loading? \n", + "78 Techincal Documentation 23.1. What is Lazy Loading? \n", + "79 Scientific Report Risk Perception \n", + "80 Scientific Report Risk Perception \n", + "81 Scientific Report Training Cost \n", + "82 Scientific Report Training Cost \n", + "83 Scientific Report LLM Tokenization Introduces Unfairness \n", + "84 Scientific Report LLM Tokenization Introduces Unfairness \n", + "85 Scientific Report U.S. Regulation \n", + "86 Scientific Report U.S. Regulation \n", + "87 Scientific Report Europe \n", + "88 Scientific Report Europe \n", + "90 Scientific Report 2.1 Pre-training Data \n", + "94 Scientific Report 3 Main results \n", + "99 Regulation Limitations of generative AI and LLMs \n", + "100 Regulation Procurement in an emerging market \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "17 Does LoRA work with any neural network contain... YES \n", + "22 how many peer-reviewed open access journals ar... A \n", + "24 Are publication fees required for all open acc... NO \n", + "27 Is Arithmetic reasoning is a task that languag... NO \n", + "28 How many large language models were evaluated? 5 \n", + "29 How many benchmarks were used to evaluate arit... 5 \n", + "30 Is symbolic reasoning usually simple for human... YES \n", + "31 How many words have the example names that the... B \n", + "32 Which symbolic reasoning task is used as an ou... A \n", + "33 How many annotators provided independent chain... 3 \n", + "34 How many random samples were examined to under... 100 \n", + "37 How many different races are there? 6 \n", + "39 Can you take a Chapter card and a Landmark til... NO \n", + "40 How many goins does a player take when discard... 3 \n", + "41 After taking a landmark tile, do you reveal a ... NO \n", + "44 Can you use a symbol more than once per turn? NO \n", + "62 If player 1 has 30 Victory points and 4 worker... A \n", + "63 Which type of water must be supplied in a toil... B \n", + "64 In which type of parkings must a carbon monoxi... C \n", + "65 What percentage is the daylight factor require... A \n", + "66 What fire resistance must vertical partitions ... A \n", + "67 What is the maximum number of threads within a... 1024 \n", + "68 Can you identify a thread with a four-dimensio... NO \n", + "69 In the offline compilation process using nvcc,... B \n", + "70 What are the two ways the host code can be out... B \n", + "71 What is the primary purpose of just-in-time (J... C \n", + "72 What happens to the compiled binary code after... A \n", + "73 When are virtual addresses assigned to graph a... C \n", + "74 What do graph memory nodes represent in a CUDA... A \n", + "75 When does a graph allocation's lifetime end? -... B \n", + "76 How must operations accessing graph memory be ... C \n", + "77 What is the primary benefit of Lazy Loading? -... A \n", + "78 Can you enable lazy loading by setting the env... NO \n", + "79 which type of risk was identified as the leadi... B \n", + "80 In which geographical area were fairness risks... C \n", + "81 What is a major consequence of the rising trai... A \n", + "82 How the AI Index and Epoch AI estimated traini... C \n", + "83 What is a major source of inequality in AI rel... A \n", + "84 What are the three major inequalities resultin... B \n", + "85 How many AI-related regulations were enacted i... 25 \n", + "86 Which of the following was identified as a hig... B \n", + "87 Which country had the highest proportion of fe... B \n", + "88 Which countries reported the smallest proporti... C \n", + "90 How many languages did the Wikipedia data cover? 20 \n", + "94 Was the model compared against GPT-4? NO \n", + "99 Can LLMs be used as an alternative to visiting... NO \n", + "100 Which of the following is NOT mentioned as a r... C \n", + "\n", + " pred_answer pred_section \n", + "10 YES NaN \n", + "17 NO NaN \n", + "22 B NaN \n", + "24 I NEED MORE INFO NaN \n", + "27 OUT OF CONTEXT NaN \n", + "28 OUT OF CONTEXT NaN \n", + "29 OUT OF CONTEXT NaN \n", + "30 OUT OF CONTEXT NaN \n", + "31 OUT OF CONTEXT NaN \n", + "32 OUT OF CONTEXT NaN \n", + "33 OUT OF CONTEXT NaN \n", + "34 OUT OF CONTEXT NaN \n", + "37 5 NaN \n", + "39 I NEED MORE INFO NaN \n", + "40 I NEED MORE INFO NaN \n", + "41 YES NaN \n", + "44 YES NaN \n", + "62 C NaN \n", + "63 OUT OF CONTEXT NaN \n", + "64 OUT OF CONTEXT NaN \n", + "65 OUT OF CONTEXT NaN \n", + "66 OUT OF CONTEXT NaN \n", + "67 OUT OF CONTEXT NaN \n", + "68 OUT OF CONTEXT NaN \n", + "69 OUT OF CONTEXT NaN \n", + "70 OUT OF CONTEXT NaN \n", + "71 OUT OF CONTEXT NaN \n", + "72 OUT OF CONTEXT NaN \n", + "73 OUT OF CONTEXT NaN \n", + "74 OUT OF CONTEXT NaN \n", + "75 OUT OF CONTEXT NaN \n", + "76 OUT OF CONTEXT NaN \n", + "77 OUT OF CONTEXT NaN \n", + "78 OUT OF CONTEXT NaN \n", + "79 OUT OF CONTEXT NaN \n", + "80 OUT OF CONTEXT NaN \n", + "81 OUT OF CONTEXT NaN \n", + "82 OUT OF CONTEXT NaN \n", + "83 OUT OF CONTEXT NaN \n", + "84 OUT OF CONTEXT NaN \n", + "85 OUT OF CONTEXT NaN \n", + "86 OUT OF CONTEXT NaN \n", + "87 OUT OF CONTEXT NaN \n", + "88 OUT OF CONTEXT NaN \n", + "90 8 NaN \n", + "94 I NEED MORE INFO NaN \n", + "99 I NEED MORE INFO NaN \n", + "100 I NEED MORE INFO NaN " ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "for index, result in results.iterrows():\n", + " if result[\"pred_answer\"].startswith(\n", + " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", + " ):\n", + " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", + "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "AhenESELHEgz", + "outputId": "e64b31e6-8ad5-4dfe-939b-5e6ace8abf5b" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "5bLJE4U7HEgx" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"LOGURU_LEVEL\"] = \"INFO\"" + "data": { + "text/plain": [ + "0.5339805825242718" ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CD7lcTDjSM7T" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0cea8125daee456ca58658176f92e380": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "y3yUsRDWHEgy" - }, - "outputs": [], - "source": [ - "from loguru import logger" - ] + "128fa55577b54565a824102717cf7365": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "d9HBkl8rM5ED" - }, - "outputs": [], - "source": [ - "import PyPDF2\n", - "\n", - "\n", - "def load_pdf(pdf_file: str) -> str | None:\n", - " try:\n", - " pdf_reader = PyPDF2.PdfReader(pdf_file)\n", - " return \"\\n\".join(page.extract_text() for page in pdf_reader.pages)\n", - " except Exception as e:\n", - " logger.exception(e)\n", - " return None" - ] + "37fd0a08531d488abf6cf2c4efe77f91": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0cea8125daee456ca58658176f92e380", + "placeholder": "​", + "style": "IPY_MODEL_128fa55577b54565a824102717cf7365", + "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "AgpODLeJHEgy" - }, - "source": [ - "## Function to Process a single Document" - ] + "45f5c1e972db4eb2b1a163c85d8b3d6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "n6d8F7cYHEgy" - }, - "outputs": [], - "source": [ - "ANSWER_WITH_TYPE_PROMPT = \"\"\"\n", - "You are a rigorous assistant answering questions.\n", - "You must only answer based on the current information available which is:\n", - "\n", - "```\n", - "{CURRENT_INFO}\n", - "```\n", - "\n", - "If the current information available not enough to answer the question,\n", - "you must return \"I need more info\" srting and nothing else:\n", - "\n", - "If the current information is enough to answer, you must return one of the following formats:\n", - "- YES/NO (for boolean questions)\n", - "- Number (for numeric questions)\n", - "- Single letter (for multiple-choice questions)\n", - "\"\"\"\n", - "\n", - "\n", - "def process_document(\n", - " document_file,\n", - " document_data,\n", - " model,\n", - "):\n", - " logger.info(\"Predicting\")\n", - " answers = {}\n", - " sections = {}\n", - " for index, row in document_data.iterrows():\n", - " question = row[\"question\"]\n", - " logger.info(f\"Question: {question}\")\n", - " messages = [\n", - " {\n", - " \"role\": \"system\",\n", - " \"content\": ANSWER_WITH_TYPE_PROMPT.format(\n", - " CURRENT_INFO=load_pdf(document_file)\n", - " ),\n", - " },\n", - " {\"role\": \"user\", \"content\": question},\n", - " ]\n", - " try:\n", - " answer = model.get_response(messages)\n", - " except Exception as e:\n", - " answers = {\n", - " index: \"Out of context\" for index in document_data.index\n", - " }\n", - " sections = {index: None for index in document_data.index}\n", - " return answers, sections\n", - " logger.info(f\"Answer: {answer}\")\n", - " answers[index] = answer\n", - " sections[index] = None\n", - "\n", - " return answers, sections" - ] + "604381fc2e1d4ddda28291ddad36edc4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "GdlWjANdHEgz" - }, - "source": [ - "## Load Model" - ] + "77e5b0640b064226a9de17039489c493": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "9zx8nCaZHEgz" - }, - "outputs": [], - "source": [ - "from structured_qa.model_loaders import load_llama_cpp_model" - ] + "78d7b4f191ff46088877d933a11da241": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d7a9754f17764fbba485fa9c1176145b", + "placeholder": "​", + "style": "IPY_MODEL_604381fc2e1d4ddda28291ddad36edc4", + "value": " 8.10G/8.10G [04:46<00:00, 42.4MB/s]" + } }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 176, - "referenced_widgets": [ - "e529d49f260c4dd095a9025e15c4cedf", - "37fd0a08531d488abf6cf2c4efe77f91", - "cc95fba624e244a4bf86d7f3c44da644", - "78d7b4f191ff46088877d933a11da241", - "77e5b0640b064226a9de17039489c493", - "0cea8125daee456ca58658176f92e380", - "128fa55577b54565a824102717cf7365", - "a26ab1d05fe044d099e4a11ecc4d95e3", - "45f5c1e972db4eb2b1a163c85d8b3d6f", - "d7a9754f17764fbba485fa9c1176145b", - "604381fc2e1d4ddda28291ddad36edc4" - ] - }, - "id": "U4R84hHRHEgz", - "outputId": "828f5651-efa9-412a-e371-973753323984" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:09.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://aiindex.stanford.edu/wp-content/uploads/2024/05/HAI_AI-Index-Report-2024.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:09.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mFile HAI_AI-Index-Report-2024.pdf.pdf already exists\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:09.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:09.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:49.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/1706.03762\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:49.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/1706.03762 to 1706.03762.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:49.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:29:49.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:04.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:04.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:05.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:05.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:07.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:07.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:08.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:08.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:09.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:09.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:11.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:11.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:12.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:12.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:13.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B: NVIDIA P100\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:13.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:14.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:14.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:16.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:16.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:17.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:17.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2106.09685.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:17.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2106.09685.pdf to 2106.09685.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:17.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:30:17.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:32.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:32.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:39.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:39.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:46.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:46.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:52.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:52.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2201.11903\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:53.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2201.11903 to 2201.11903.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:53.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:53.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:54.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2210.05189\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:54.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2210.05189 to 2210.05189.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:54.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:31:54.812\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:05.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:05.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:05.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:05.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:06.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:06.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:06.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 14\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:06.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:06.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:06.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:07.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:07.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://arxiv.org/pdf/2302.13971\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:07.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://arxiv.org/pdf/2302.13971 to 2302.13971.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:07.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:32:07.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:22.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:22.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:23.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:23.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:24.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:24.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:26.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:26.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:28.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:28.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:29.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:29.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:30.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:30.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:32.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:32.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:33.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:33.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:34.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://assets.publishing.service.gov.uk/media/65c3b5d628a4a00012d2ba5c/6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf to 6.8558_CO_Generative_AI_Framework_Report_v7_WEB.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:34.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:33:34.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:04.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:04.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:07.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:07.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:10.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:10.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:13.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:13.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:15.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:15.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:16.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://authorsalliance.org/wp-content/uploads/Documents/Guides/Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf to Authors%20Alliance%20-%20Understanding%20Open%20Access.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:16.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:35:16.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:24.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:24.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:25.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:25.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:27.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:27.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:28.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:28.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:30.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:30.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:32.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:32.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:33.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://commission.europa.eu/document/download/1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf to 1654ca52-ec72-4bae-ba40-d2fc0f3d71ae_en?filename=mit-1-performance-and-technical-performance-specification-v1-2_en.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:33.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:33.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:37.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:38.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf to CUDA_C_Programming_Guide.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:38.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:38.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:54.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:54.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/7DUME_EN01_Rules.pdf to 7DUME_EN01_Rules.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:54.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:54.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:58.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:58.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:58.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:58.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:59.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:59.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:59.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:36:59.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:00.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:00.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:01.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:01.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:01.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:01.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:01.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:01.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:02.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:02.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:02.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:02.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:03.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:03.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:03.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:03.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:04.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:04.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:04.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:04.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:05.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:05.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m12\u001b[0m - \u001b[1mDownloading document https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:05.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mDownloaded https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf to is_eotn_rulebook.pdf.pdf\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:05.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:05.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:17.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:17.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:17.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:18.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:18.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:18.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:19.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:19.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:21.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:21.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:21.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:21.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:22.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:22.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:23.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:23.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:24.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:24.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:25.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:25.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:25.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:25.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:26.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:26.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-04 18:37:27.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_document\u001b[0m:\u001b[36m47\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "\n", - "logger.info(\"Loading input data\")\n", - "data = pd.read_csv(\"structured_qa.csv\")\n", - "data[\"pred_answer\"] = [None] * len(data)\n", - "data[\"pred_section\"] = [None] * len(data)\n", - "\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " logger.info(f\"Downloading document {document_link}\")\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " else:\n", - " logger.info(f\"File {downloaded_document} already exists\")\n", - "\n", - " answers, sections = process_document(downloaded_document, document_data, model)\n", - "\n", - " for index in document_data.index:\n", - " data.loc[index, \"pred_answer\"] = str(answers[index]).upper()\n", - " data.loc[index, \"pred_section\"] = sections[index]\n", - "\n", - "data.to_csv(\"results.csv\")" - ] + "d7a9754f17764fbba485fa9c1176145b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "3eW9TIKjHEgz", - "outputId": "344909f7-1fac-4810-f56f-3fd22452973f" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "17 17 https://arxiv.org/pdf/2106.09685.pdf \n", - "22 22 https://authorsalliance.org/wp-content/uploads... \n", - "24 24 https://authorsalliance.org/wp-content/uploads... \n", - "27 27 https://arxiv.org/pdf/2201.11903 \n", - "28 28 https://arxiv.org/pdf/2201.11903 \n", - "29 29 https://arxiv.org/pdf/2201.11903 \n", - "30 30 https://arxiv.org/pdf/2201.11903 \n", - "31 31 https://arxiv.org/pdf/2201.11903 \n", - "32 32 https://arxiv.org/pdf/2201.11903 \n", - "33 33 https://arxiv.org/pdf/2201.11903 \n", - "34 34 https://arxiv.org/pdf/2201.11903 \n", - "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", - "39 39 https://github.com/mozilla-ai/structured-qa/re... \n", - "40 40 https://github.com/mozilla-ai/structured-qa/re... \n", - "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", - "44 44 https://github.com/mozilla-ai/structured-qa/re... \n", - "62 62 https://github.com/mozilla-ai/structured-qa/re... \n", - "63 63 https://commission.europa.eu/document/download... \n", - "64 64 https://commission.europa.eu/document/download... \n", - "65 65 https://commission.europa.eu/document/download... \n", - "66 66 https://commission.europa.eu/document/download... \n", - "67 67 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "69 69 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "70 70 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "71 71 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "72 72 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "73 73 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "74 74 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "75 75 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "76 76 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "77 77 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "78 78 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "79 79 https://aiindex.stanford.edu/wp-content/upload... \n", - "80 80 https://aiindex.stanford.edu/wp-content/upload... \n", - "81 81 https://aiindex.stanford.edu/wp-content/upload... \n", - "82 82 https://aiindex.stanford.edu/wp-content/upload... \n", - "83 83 https://aiindex.stanford.edu/wp-content/upload... \n", - "84 84 https://aiindex.stanford.edu/wp-content/upload... \n", - "85 85 https://aiindex.stanford.edu/wp-content/upload... \n", - "86 86 https://aiindex.stanford.edu/wp-content/upload... \n", - "87 87 https://aiindex.stanford.edu/wp-content/upload... \n", - "88 88 https://aiindex.stanford.edu/wp-content/upload... \n", - "90 90 https://arxiv.org/pdf/2302.13971 \n", - "94 94 https://arxiv.org/pdf/2302.13971 \n", - "99 99 https://assets.publishing.service.gov.uk/media... \n", - "100 100 https://assets.publishing.service.gov.uk/media... \n", - "\n", - " type section \\\n", - "10 Scientific Paper 5.4 Regularization \n", - "17 Scientific Report 4 OUR METHOD \n", - "22 Techincal Documentation HOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER? \n", - "24 Techincal Documentation OVERCOMING RESERVATIONS ABOUT OPEN ACCESS \n", - "27 Scientific Report 3 Arithmetic Reasoning \n", - "28 Scientific Report 3.1 Experimental Setup \n", - "29 Scientific Report 3.1 Experimental Setup \n", - "30 Scientific Report 5 Symbolic Reasoning \n", - "31 Scientific Report 5 Symbolic Reasoning \n", - "32 Scientific Report 5 Symbolic Reasoning \n", - "33 Scientific Report 3.4 Robustness of Chain of Thought \n", - "34 Scientific Report 3.2 Results \n", - "37 Board Game CARD AND TILE EFFECTS \n", - "39 Board Game CHAPTER OVERVIEW \n", - "40 Board Game CHAPTER OVERVIEW \n", - "41 Board Game CHAPTER OVERVIEW \n", - "44 Board Game CARD AND TILE EFFECTS \n", - "62 Board Game GAME END \n", - "63 Regulation 2.1. Toilets \n", - "64 Regulation CARBON MONOXIDE DETECTION AND VENTING \n", - "65 Regulation 4.1. Natural lighting \n", - "66 Regulation 1.2.1. Internal partitions and doors \n", - "67 Techincal Documentation 5.2. Thread Hierarchy \n", - "68 Techincal Documentation 5.2. Thread Hierarchy \n", - "69 Techincal Documentation 6.1.1. Compilation Workflow \n", - "70 Techincal Documentation 6.1.1. Compilation Workflow \n", - "71 Techincal Documentation 6.1.1. Compilation Workflow \n", - "72 Techincal Documentation 6.1.1. Compilation Workflow \n", - "73 Techincal Documentation 15.3. API Fundamentals \n", - "74 Techincal Documentation 15.3. API Fundamentals \n", - "75 Techincal Documentation 15.3. API Fundamentals \n", - "76 Techincal Documentation 15.3. API Fundamentals \n", - "77 Techincal Documentation 23.1. What is Lazy Loading? \n", - "78 Techincal Documentation 23.1. What is Lazy Loading? \n", - "79 Scientific Report Risk Perception \n", - "80 Scientific Report Risk Perception \n", - "81 Scientific Report Training Cost \n", - "82 Scientific Report Training Cost \n", - "83 Scientific Report LLM Tokenization Introduces Unfairness \n", - "84 Scientific Report LLM Tokenization Introduces Unfairness \n", - "85 Scientific Report U.S. Regulation \n", - "86 Scientific Report U.S. Regulation \n", - "87 Scientific Report Europe \n", - "88 Scientific Report Europe \n", - "90 Scientific Report 2.1 Pre-training Data \n", - "94 Scientific Report 3 Main results \n", - "99 Regulation Limitations of generative AI and LLMs \n", - "100 Regulation Procurement in an emerging market \n", - "\n", - " question answer \\\n", - "10 What was the dropout rate used for the base mo... 0.1 \n", - "17 Does LoRA work with any neural network contain... YES \n", - "22 how many peer-reviewed open access journals ar... A \n", - "24 Are publication fees required for all open acc... NO \n", - "27 Is Arithmetic reasoning is a task that languag... NO \n", - "28 How many large language models were evaluated? 5 \n", - "29 How many benchmarks were used to evaluate arit... 5 \n", - "30 Is symbolic reasoning usually simple for human... YES \n", - "31 How many words have the example names that the... B \n", - "32 Which symbolic reasoning task is used as an ou... A \n", - "33 How many annotators provided independent chain... 3 \n", - "34 How many random samples were examined to under... 100 \n", - "37 How many different races are there? 6 \n", - "39 Can you take a Chapter card and a Landmark til... NO \n", - "40 How many goins does a player take when discard... 3 \n", - "41 After taking a landmark tile, do you reveal a ... NO \n", - "44 Can you use a symbol more than once per turn? NO \n", - "62 If player 1 has 30 Victory points and 4 worker... A \n", - "63 Which type of water must be supplied in a toil... B \n", - "64 In which type of parkings must a carbon monoxi... C \n", - "65 What percentage is the daylight factor require... A \n", - "66 What fire resistance must vertical partitions ... A \n", - "67 What is the maximum number of threads within a... 1024 \n", - "68 Can you identify a thread with a four-dimensio... NO \n", - "69 In the offline compilation process using nvcc,... B \n", - "70 What are the two ways the host code can be out... B \n", - "71 What is the primary purpose of just-in-time (J... C \n", - "72 What happens to the compiled binary code after... A \n", - "73 When are virtual addresses assigned to graph a... C \n", - "74 What do graph memory nodes represent in a CUDA... A \n", - "75 When does a graph allocation's lifetime end? -... B \n", - "76 How must operations accessing graph memory be ... C \n", - "77 What is the primary benefit of Lazy Loading? -... A \n", - "78 Can you enable lazy loading by setting the env... NO \n", - "79 which type of risk was identified as the leadi... B \n", - "80 In which geographical area were fairness risks... C \n", - "81 What is a major consequence of the rising trai... A \n", - "82 How the AI Index and Epoch AI estimated traini... C \n", - "83 What is a major source of inequality in AI rel... A \n", - "84 What are the three major inequalities resultin... B \n", - "85 How many AI-related regulations were enacted i... 25 \n", - "86 Which of the following was identified as a hig... B \n", - "87 Which country had the highest proportion of fe... B \n", - "88 Which countries reported the smallest proporti... C \n", - "90 How many languages did the Wikipedia data cover? 20 \n", - "94 Was the model compared against GPT-4? NO \n", - "99 Can LLMs be used as an alternative to visiting... NO \n", - "100 Which of the following is NOT mentioned as a r... C \n", - "\n", - " pred_answer pred_section \n", - "10 YES NaN \n", - "17 NO NaN \n", - "22 B NaN \n", - "24 I NEED MORE INFO NaN \n", - "27 OUT OF CONTEXT NaN \n", - "28 OUT OF CONTEXT NaN \n", - "29 OUT OF CONTEXT NaN \n", - "30 OUT OF CONTEXT NaN \n", - "31 OUT OF CONTEXT NaN \n", - "32 OUT OF CONTEXT NaN \n", - "33 OUT OF CONTEXT NaN \n", - "34 OUT OF CONTEXT NaN \n", - "37 5 NaN \n", - "39 I NEED MORE INFO NaN \n", - "40 I NEED MORE INFO NaN \n", - "41 YES NaN \n", - "44 YES NaN \n", - "62 C NaN \n", - "63 OUT OF CONTEXT NaN \n", - "64 OUT OF CONTEXT NaN \n", - "65 OUT OF CONTEXT NaN \n", - "66 OUT OF CONTEXT NaN \n", - "67 OUT OF CONTEXT NaN \n", - "68 OUT OF CONTEXT NaN \n", - "69 OUT OF CONTEXT NaN \n", - "70 OUT OF CONTEXT NaN \n", - "71 OUT OF CONTEXT NaN \n", - "72 OUT OF CONTEXT NaN \n", - "73 OUT OF CONTEXT NaN \n", - "74 OUT OF CONTEXT NaN \n", - "75 OUT OF CONTEXT NaN \n", - "76 OUT OF CONTEXT NaN \n", - "77 OUT OF CONTEXT NaN \n", - "78 OUT OF CONTEXT NaN \n", - "79 OUT OF CONTEXT NaN \n", - "80 OUT OF CONTEXT NaN \n", - "81 OUT OF CONTEXT NaN \n", - "82 OUT OF CONTEXT NaN \n", - "83 OUT OF CONTEXT NaN \n", - "84 OUT OF CONTEXT NaN \n", - "85 OUT OF CONTEXT NaN \n", - "86 OUT OF CONTEXT NaN \n", - "87 OUT OF CONTEXT NaN \n", - "88 OUT OF CONTEXT NaN \n", - "90 8 NaN \n", - "94 I NEED MORE INFO NaN \n", - "99 I NEED MORE INFO NaN \n", - "100 I NEED MORE INFO NaN " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0documenttypesectionquestionanswerpred_answerpred_section
1010https://arxiv.org/pdf/1706.03762Scientific Paper5.4 RegularizationWhat was the dropout rate used for the base mo...0.1YESNaN
1717https://arxiv.org/pdf/2106.09685.pdfScientific Report4 OUR METHODDoes LoRA work with any neural network contain...YESNONaN
2222https://authorsalliance.org/wp-content/uploads...Techincal DocumentationHOW DO YOU CHOOSE AN OPEN ACCESS PUBLISHER?how many peer-reviewed open access journals ar...ABNaN
2424https://authorsalliance.org/wp-content/uploads...Techincal DocumentationOVERCOMING RESERVATIONS ABOUT OPEN ACCESSAre publication fees required for all open acc...NOI NEED MORE INFONaN
2727https://arxiv.org/pdf/2201.11903Scientific Report3 Arithmetic ReasoningIs Arithmetic reasoning is a task that languag...NOOUT OF CONTEXTNaN
2828https://arxiv.org/pdf/2201.11903Scientific Report3.1 Experimental SetupHow many large language models were evaluated?5OUT OF CONTEXTNaN
2929https://arxiv.org/pdf/2201.11903Scientific Report3.1 Experimental SetupHow many benchmarks were used to evaluate arit...5OUT OF CONTEXTNaN
3030https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningIs symbolic reasoning usually simple for human...YESOUT OF CONTEXTNaN
3131https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningHow many words have the example names that the...BOUT OF CONTEXTNaN
3232https://arxiv.org/pdf/2201.11903Scientific Report5 Symbolic ReasoningWhich symbolic reasoning task is used as an ou...AOUT OF CONTEXTNaN
3333https://arxiv.org/pdf/2201.11903Scientific Report3.4 Robustness of Chain of ThoughtHow many annotators provided independent chain...3OUT OF CONTEXTNaN
3434https://arxiv.org/pdf/2201.11903Scientific Report3.2 ResultsHow many random samples were examined to under...100OUT OF CONTEXTNaN
3737https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE EFFECTSHow many different races are there?65NaN
3939https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWCan you take a Chapter card and a Landmark til...NOI NEED MORE INFONaN
4040https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWHow many goins does a player take when discard...3I NEED MORE INFONaN
4141https://github.com/mozilla-ai/structured-qa/re...Board GameCHAPTER OVERVIEWAfter taking a landmark tile, do you reveal a ...NOYESNaN
4444https://github.com/mozilla-ai/structured-qa/re...Board GameCARD AND TILE EFFECTSCan you use a symbol more than once per turn?NOYESNaN
6262https://github.com/mozilla-ai/structured-qa/re...Board GameGAME ENDIf player 1 has 30 Victory points and 4 worker...ACNaN
6363https://commission.europa.eu/document/download...Regulation2.1. ToiletsWhich type of water must be supplied in a toil...BOUT OF CONTEXTNaN
6464https://commission.europa.eu/document/download...RegulationCARBON MONOXIDE DETECTION AND VENTINGIn which type of parkings must a carbon monoxi...COUT OF CONTEXTNaN
6565https://commission.europa.eu/document/download...Regulation4.1. Natural lightingWhat percentage is the daylight factor require...AOUT OF CONTEXTNaN
6666https://commission.europa.eu/document/download...Regulation1.2.1. Internal partitions and doorsWhat fire resistance must vertical partitions ...AOUT OF CONTEXTNaN
6767https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation5.2. Thread HierarchyWhat is the maximum number of threads within a...1024OUT OF CONTEXTNaN
6868https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation5.2. Thread HierarchyCan you identify a thread with a four-dimensio...NOOUT OF CONTEXTNaN
6969https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowIn the offline compilation process using nvcc,...BOUT OF CONTEXTNaN
7070https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowWhat are the two ways the host code can be out...BOUT OF CONTEXTNaN
7171https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowWhat is the primary purpose of just-in-time (J...COUT OF CONTEXTNaN
7272https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation6.1.1. Compilation WorkflowWhat happens to the compiled binary code after...AOUT OF CONTEXTNaN
7373https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsWhen are virtual addresses assigned to graph a...COUT OF CONTEXTNaN
7474https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsWhat do graph memory nodes represent in a CUDA...AOUT OF CONTEXTNaN
7575https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsWhen does a graph allocation's lifetime end? -...BOUT OF CONTEXTNaN
7676https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation15.3. API FundamentalsHow must operations accessing graph memory be ...COUT OF CONTEXTNaN
7777https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation23.1. What is Lazy Loading?What is the primary benefit of Lazy Loading? -...AOUT OF CONTEXTNaN
7878https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra...Techincal Documentation23.1. What is Lazy Loading?Can you enable lazy loading by setting the env...NOOUT OF CONTEXTNaN
7979https://aiindex.stanford.edu/wp-content/upload...Scientific ReportRisk Perceptionwhich type of risk was identified as the leadi...BOUT OF CONTEXTNaN
8080https://aiindex.stanford.edu/wp-content/upload...Scientific ReportRisk PerceptionIn which geographical area were fairness risks...COUT OF CONTEXTNaN
8181https://aiindex.stanford.edu/wp-content/upload...Scientific ReportTraining CostWhat is a major consequence of the rising trai...AOUT OF CONTEXTNaN
8282https://aiindex.stanford.edu/wp-content/upload...Scientific ReportTraining CostHow the AI Index and Epoch AI estimated traini...COUT OF CONTEXTNaN
8383https://aiindex.stanford.edu/wp-content/upload...Scientific ReportLLM Tokenization Introduces UnfairnessWhat is a major source of inequality in AI rel...AOUT OF CONTEXTNaN
8484https://aiindex.stanford.edu/wp-content/upload...Scientific ReportLLM Tokenization Introduces UnfairnessWhat are the three major inequalities resultin...BOUT OF CONTEXTNaN
8585https://aiindex.stanford.edu/wp-content/upload...Scientific ReportU.S. RegulationHow many AI-related regulations were enacted i...25OUT OF CONTEXTNaN
8686https://aiindex.stanford.edu/wp-content/upload...Scientific ReportU.S. RegulationWhich of the following was identified as a hig...BOUT OF CONTEXTNaN
8787https://aiindex.stanford.edu/wp-content/upload...Scientific ReportEuropeWhich country had the highest proportion of fe...BOUT OF CONTEXTNaN
8888https://aiindex.stanford.edu/wp-content/upload...Scientific ReportEuropeWhich countries reported the smallest proporti...COUT OF CONTEXTNaN
9090https://arxiv.org/pdf/2302.13971Scientific Report2.1 Pre-training DataHow many languages did the Wikipedia data cover?208NaN
9494https://arxiv.org/pdf/2302.13971Scientific Report3 Main resultsWas the model compared against GPT-4?NOI NEED MORE INFONaN
9999https://assets.publishing.service.gov.uk/media...RegulationLimitations of generative AI and LLMsCan LLMs be used as an alternative to visiting...NOI NEED MORE INFONaN
100100https://assets.publishing.service.gov.uk/media...RegulationProcurement in an emerging marketWhich of the following is NOT mentioned as a r...CI NEED MORE INFONaN
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 48,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 10,\n \"max\": 100,\n \"num_unique_values\": 48,\n \"samples\": [\n 72,\n 85,\n 71\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"https://github.com/mozilla-ai/structured-qa/releases/download/0.3.2/is_eotn_rulebook.pdf\",\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Scientific Report\",\n \"Regulation\",\n \"Techincal Documentation\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 29,\n \"samples\": [\n \"Limitations of generative AI and LLMs\",\n \"5.2. Thread Hierarchy\",\n \"2.1. Toilets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 48,\n \"samples\": [\n \"What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\",\n \"How many AI-related regulations were enacted in the United States in 2023?\",\n \"What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 13,\n \"samples\": [\n \"25\",\n \"C\",\n \"0.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"NO\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 14 - } + "e529d49f260c4dd095a9025e15c4cedf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_37fd0a08531d488abf6cf2c4efe77f91", + "IPY_MODEL_cc95fba624e244a4bf86d7f3c44da644", + "IPY_MODEL_78d7b4f191ff46088877d933a11da241" ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "for index, result in results.iterrows():\n", - " if result[\"pred_answer\"].startswith(\n", - " (f\"-{result['answer']}\", f\"{result['answer']}\")\n", - " ):\n", - " results.loc[index, \"pred_answer\"] = result[\"answer\"]\n", - "results.loc[results[\"answer\"] != results[\"pred_answer\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "AhenESELHEgz", - "outputId": "e64b31e6-8ad5-4dfe-939b-5e6ace8abf5b" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0.5339805825242718" - ] - }, - "metadata": {}, - "execution_count": 15 - } - ], - "source": [ - "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CD7lcTDjSM7T" - }, - "outputs": [], - "source": [] + "layout": "IPY_MODEL_77e5b0640b064226a9de17039489c493" + } } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.10.12" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "e529d49f260c4dd095a9025e15c4cedf": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_37fd0a08531d488abf6cf2c4efe77f91", - "IPY_MODEL_cc95fba624e244a4bf86d7f3c44da644", - "IPY_MODEL_78d7b4f191ff46088877d933a11da241" - ], - "layout": "IPY_MODEL_77e5b0640b064226a9de17039489c493" - } - }, - "37fd0a08531d488abf6cf2c4efe77f91": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0cea8125daee456ca58658176f92e380", - "placeholder": "​", - "style": "IPY_MODEL_128fa55577b54565a824102717cf7365", - "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" - } - }, - "cc95fba624e244a4bf86d7f3c44da644": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a26ab1d05fe044d099e4a11ecc4d95e3", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_45f5c1e972db4eb2b1a163c85d8b3d6f", - "value": 8098525888 - } - }, - "78d7b4f191ff46088877d933a11da241": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d7a9754f17764fbba485fa9c1176145b", - "placeholder": "​", - "style": "IPY_MODEL_604381fc2e1d4ddda28291ddad36edc4", - "value": " 8.10G/8.10G [04:46<00:00, 42.4MB/s]" - } - }, - "77e5b0640b064226a9de17039489c493": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0cea8125daee456ca58658176f92e380": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "128fa55577b54565a824102717cf7365": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a26ab1d05fe044d099e4a11ecc4d95e3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "45f5c1e972db4eb2b1a163c85d8b3d6f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d7a9754f17764fbba485fa9c1176145b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "604381fc2e1d4ddda28291ddad36edc4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/benchmark/qwen_2_5_7B_perfect_context.ipynb b/benchmark/qwen_2_5_7B_perfect_context.ipynb index fff97c5..175ac55 100644 --- a/benchmark/qwen_2_5_7B_perfect_context.ipynb +++ b/benchmark/qwen_2_5_7B_perfect_context.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -46,28 +46,14 @@ "id": "QrgOGtuGlyhT", "outputId": "15e40933-5457-4c0d-c16c-0b95852906e3" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'structured-qa'...\n", - "remote: Enumerating objects: 939, done.\u001b[K\n", - "remote: Counting objects: 100% (377/377), done.\u001b[K\n", - "remote: Compressing objects: 100% (224/224), done.\u001b[K\n", - "remote: Total 939 (delta 246), reused 220 (delta 140), pack-reused 562 (from 1)\u001b[K\n", - "Receiving objects: 100% (939/939), 2.56 MiB | 9.80 MiB/s, done.\n", - "Resolving deltas: 100% (528/528), done.\n" - ] - } - ], + "outputs": [], "source": [ - "!git clone --single-branch --branch 5-add-benchmark https://github.com/mozilla-ai/structured-qa" + "!git clone https://github.com/mozilla-ai/structured-qa" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -75,114 +61,14 @@ "id": "S22kTrfPlyhU", "outputId": "f6e41354-9abf-4cd8-c07b-40882a870523" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Processing ./structured-qa\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting fire (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading fire-0.7.0.tar.gz (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (0.27.1)\n", - "Collecting loguru (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (2.10.6)\n", - "Collecting pymupdf4llm (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading pymupdf4llm-0.0.17-py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from structured-qa==0.3.3.dev113+g0ab4688) (6.0.2)\n", - "Collecting rapidfuzz (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", - "Collecting streamlit (from structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->structured-qa==0.3.3.dev113+g0ab4688) (2.5.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev113+g0ab4688) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic->structured-qa==0.3.3.dev113+g0ab4688) (2.27.2)\n", - "Collecting pymupdf>=1.24.10 (from pymupdf4llm->structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: altair<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.5.0)\n", - "Requirement already satisfied: blinker<2,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.9.0)\n", - "Requirement already satisfied: cachetools<6,>=4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.5.1)\n", - "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (8.1.8)\n", - "Requirement already satisfied: numpy<3,>=1.23 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.26.4)\n", - "Requirement already satisfied: pandas<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.2.2)\n", - "Requirement already satisfied: pillow<12,>=7.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (11.1.0)\n", - "Requirement already satisfied: protobuf<6,>=3.20 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.25.6)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (17.0.0)\n", - "Requirement already satisfied: rich<14,>=10.14.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (13.9.4)\n", - "Requirement already satisfied: tenacity<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (9.0.0)\n", - "Requirement already satisfied: toml<2,>=0.10.1 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.10.2)\n", - "Collecting watchdog<7,>=2.1.5 (from streamlit->structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.1.44)\n", - "Collecting pydeck<1,>=0.8.0b4 (from streamlit->structured-qa==0.3.3.dev113+g0ab4688)\n", - " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: tornado<7,>=6.0.3 in /usr/local/lib/python3.11/dist-packages (from streamlit->structured-qa==0.3.3.dev113+g0ab4688) (6.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.1.5)\n", - "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.23.0)\n", - "Requirement already satisfied: narwhals>=1.14.2 in /usr/local/lib/python3.11/dist-packages (from altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.24.1)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (4.0.12)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2025.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub->structured-qa==0.3.3.dev113+g0ab4688) (2024.12.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2.18.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (5.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (3.0.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (25.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.22.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit->structured-qa==0.3.3.dev113+g0ab4688) (1.17.0)\n", - "Downloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf4llm-0.0.17-py3-none-any.whl (26 kB)\n", - "Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m66.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m108.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m99.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m89.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.1/79.1 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: structured-qa, fire\n", - " Building wheel for structured-qa (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for structured-qa: filename=structured_qa-0.3.3.dev113+g0ab4688-py3-none-any.whl size=13247 sha256=331a7e775095111e06c2431f076e99cfda44a105ced2485b20b22643b4bce5eb\n", - " Stored in directory: /root/.cache/pip/wheels/b8/d1/8b/1585580e7787d68790745653775eb485d52a0d5386b616c827\n", - " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=131c84827cf2ab58b2046e5f9cb19cdb18596d75c3b40ca40d69579499f07b28\n", - " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", - "Successfully built structured-qa fire\n", - "Installing collected packages: watchdog, rapidfuzz, pymupdf, loguru, fire, pymupdf4llm, pydeck, streamlit, structured-qa\n", - "Successfully installed fire-0.7.0 loguru-0.7.3 pydeck-0.9.1 pymupdf-1.25.2 pymupdf4llm-0.0.17 rapidfuzz-3.12.1 streamlit-1.41.1 structured-qa-0.3.3.dev113+g0ab4688 watchdog-6.0.0\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install ./structured-qa" + "%pip install --quiet ./structured-qa" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -190,17 +76,7 @@ "id": "mZtwFXA5IOvn", "outputId": "e0601b97-693b-4548-a41f-62ce35f8538b" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m445.2/445.2 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h" - ] - } - ], + "outputs": [], "source": [ "%pip install --quiet https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp311-cp311-linux_x86_64.whl" ] @@ -322,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -344,34 +220,7 @@ "id": "ObsvwlNslyhZ", "outputId": "50fbe86d-075b-46b5-ca45-5b24eabaf753" }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Qwen2.5-7B-Instruct-Q8_0.gguf: 0%| | 0.00/8.10G [00:00\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mLoading input data\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:42.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:42.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In billions, how many trainable parameters does GPT-3 have?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:44.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 175\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:44.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA introduce additional inference latency compared to full fine-tuning?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:44.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:44.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:44.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What fire resistance must vertical partitions have? -A: EI30 -B: EI60 -C: EI90\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:45.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A: EI30\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:45.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:45.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: When are virtual addresses assigned to graph allocations? -A: At the moment the graph is executed in the GPU. -B: When the allocation is actually being used by the execution. -C: When the node is created.\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:45.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What do graph memory nodes represent in a CUDA graph? -A: Actions like allocating or freeing the memory. -B: Code that executes on the CPU to allocate memory. -C: The control flow and branching within a graph.\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.169\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: When does a graph allocation's lifetime end? -A: Only when the application shuts down. -B: When the execution reaches the freeing graph node, `cudaFreeAsync()`, or `cudaFree()`. -C: Only when the graph is destroyed.\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How must operations accessing graph memory be ordered within a graph? -A: Before the allocation node and after the freeing node. -B: After any previous GPU execution. -C: After the allocation node and before the freeing operation.\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:46.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What proportion of the pre-training data was from Github? -A: 4.5% -B: 15.0% -C: 4%\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A: 4.5%\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many languages did the Wikipedia data cover?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of water must be supplied in a toilet sink? -A: hot -B: cold -C: hot and cold\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:48.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:49.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:49.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What value was used for the weight decay?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:49.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 0.1\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:49.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:49.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can recurrent networks also be converted to decision trees?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.053\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the primary benefit of Lazy Loading? -A: It reduces memory overhead and saves initialization time. -B: It allows programs to load all kernels faster during initialization. -C: It makes CUDA programs easier to debug and optimize.\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you enable lazy loading by setting the env var `CUDA_MODULE_DATA_LOADING`?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:50.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is Arithmetic reasoning is a task that language models often find very easy?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:51.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:51.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:51.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the toy model (y = x^2)?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the toy model (y = x^2) use Sigmoid activation function?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parameters are in the toy model (y = x^2) tree?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 14\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers are in the half-moon neural network?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the main computational advantage of decision trees? -A: Less storage memory, -B: Fewer operations, -C: Lower accuracy\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:52.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many benchmarks were tested?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 20\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Was the model compared against GPT-4?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of architecture does the model use? -A: decoder only -B: encoder only -C: encoder-decoder\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:53.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the encoder?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:54.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:54.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many layers compose the decoder?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:54.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 6\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:54.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:54.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many large language models were evaluated?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:55.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: Five\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:55.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many benchmarks were used to evaluate arithmetic reasoning?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:55.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:55.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:55.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many random samples were examined to understand model performance?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 100\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many parallel attention heads are used?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned embeddings for the input and output tokens?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:56.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many annotators provided independent chains of thought?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does the final model use learned positional embeddings?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LoRA work with any neural network containing dense layers?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:57.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What percentage is the daylight factor required for façades with exterior obstructions? -A: 0.7% -B: 80% -C: 0.77%\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs re-produce biases that exist in training data?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do authors consider the evaluations enough to fully comprehend the risks of the model?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:58.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is symbolic reasoning usually simple for humans but challenging for language models?\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:59.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:59.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many words have the example names that the model has seen for letter concatenation? -A: 3 -B: 2 -C: 4\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:59.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:59.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which symbolic reasoning task is used as an out-of-domain evaluation? -A: Coin Flip -B: Tower of Hanoi -C: Chess puzzles\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:59.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:59.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:01:59.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does LLaMA compare worse than GPT-3 on average for the CrowS-Paris bias test?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many GPUs were used for training?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 8\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What type of GPUs were used for training? -A: NVIDIA A100 -B: NVIDIA P100 -C: NVIDIA T4\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:00.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of threads within a thread block?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:01.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 1024\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:01.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you identify a thread with a four-dimensional index?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:01.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:01.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:01.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What optimizer was used for training? -A: AdamW -B: Adam -C: SGD\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many warmup steps were used?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 4000\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What was the dropout rate used for the base model?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: Pdrop = 0.1\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:02.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In the offline compilation process using nvcc, what happens to the device code? -A: It is directly executed on the host CPU. -B: It is transformed into assembly and/or binary form. -C: It is ignored and not used in the final application.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What are the two ways the host code can be output after being processed by nvcc? -A: As executable machine code, or a interpreted scripting language file. -B: As C++ code for later compilation, or as object code directly. -C: As an encrypted file or in a platform specific assembly format.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the primary purpose of just-in-time (JIT) compilation? -A: To convert host code into device code for execution on the GPU. -B: To optimize the performance of host code before it is compiled. -C: To compile PTX code into binary code at runtime by the device driver.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What happens to the compiled binary code after JIT compilation by the device driver? -A: It is cached for later use and to avoid recompilation. -B: It's directly interpreted and doesn't need to be cached. -C: It is deleted after use to save space.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:03.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you raid the locations of a player that has passed during the action phase?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:04.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:04.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many points in the scoreboard must be reached during the Action phase to trigger the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:04.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:04.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:04.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: what are the three key elements to consider when establishing accountable practices across the AI lifecycle? -A: Innovation, efficiency, and cost-effectiveness. -B: Answerability, auditability, and liability. -C: Speed, scalability, and security.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What does the text suggest end-users should do when using generative AI for tasks like drafting emails and reports? -A: Assume that the output is inherently accurate and truthful without additional checks. -B: Assume responsibility for the output and check that it is factually correct, non-discriminatory, and does not violate existing guidelines. -C: Rely on the provider's terms of use for all compliance and accuracy checks.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In which type of parkings must a carbon monoxide detector be installed? -A: indoor -B: underground -C: indoor or underground\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:05.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If a player is missing 2 skill symbols, how many coins must they pay to the reserve?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 2\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many different races are there?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 5\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use a symbol more than once per turn?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which type of cards provide coins? -A: Gray -B: Yellow -C: Blue\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:06.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:07.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: During which chapter the purple cards become available? -A: Chapter 1 -B: Chapter 2 -C: Chapter 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:07.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:07.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:07.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are Gold Open Access and Green Open Access mutually exclusive.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:07.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:07.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:07.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which player begins the game? -A: Sauron -B: The Fellowship -C: Other\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you take a Chapter card and a Landmark tile on your same turn?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many goins does a player take when discarding a card during Chapter 3?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: After taking a landmark tile, do you reveal a new tile and the end of your turn?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a cleanup phase in the final round?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:08.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If you place or move an unit and an enemy fortress is present, does it trigger a conflict?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In how many regions do you need to be present to win the game?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 7\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can players conquer and pillage the same island during the expedition phase?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:09.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a fish to conquer a distant island?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points you get from each conquered island?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 1\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which country had the highest proportion of female bachelor's graduates in informatics, computer science, computer engineering, and information technology among the surveyed European nations? -A: France. -B: Bulgaria. -C: United Kingdom\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which countries reported the smallest proportion of female master's graduates in informatics, CS, CE, and IT as of 2022? -A: Estonia, Romania, and Bulgaria. -B: United Kingdom, Germany, and Switzerland. -C: Belgium, Italy, and Switzerland.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:10.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can the game end in a tie?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:11.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:11.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: If player 1 has 30 Victory points and 4 workers and player 2 has 30 Victory points and 3 workers, who wins the game? -A: Player 1 -B: Player 2 -C: It's a tie\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:11.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:11.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:11.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: how many peer-reviewed open access journals are indexed by the Directory of Open Access Journals (DOAJ)? -A: Over 10,000 -B: Over 20,000 -C: Exactly 30,000\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is a major source of inequality in AI related to tokenization? -A: The significant variability in the number of tokens required to represent the same content across different languages. -B: The uniform processing speed of all languages. -C: The consistent cost of inference across different languages.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What are the three major inequalities resulting from variable tokenization? -A: Increased model training costs, limited access to resources, and biased results. -B: Higher inference costs, longer processing times, and reduced available context for the model. -C: Limited language support, increased hardware requirements, and data bias.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:12.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory points are granted by a built Field Location card that work as an upgrade?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:13.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: I need more info\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:13.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Do you need a ship to be on the expedition board to use a card that allows to pillage or conquer right away?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:13.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:13.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:13.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is the maximum number of cards a player may acquire during the lookout phase?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 4\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Is there a limit to the number of cards a player may have in their hand?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: By how much can LoRA reduce GPU memory requirements during training? -A: 10x, -B: 5x, -C: 3x\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:14.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is not considered a limitation of the Large Language Models? -A: Hallucination -B: Explainability -C: Memorization\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:15.465\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:15.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can LLMs be used as an alternative to visiting a doctor?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:15.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:15.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:15.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Are publication fees required for all open access journals?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many chapters does the game last?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many victory conditions are there?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 3\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following is NOT mentioned as a relevant legal or regulatory provision regarding the use of AI technologies? -A: UK data protection law -B: The Online Safety Act -C: Digital, Data and Technology (DDaT) Playbook?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:17.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Can you use the raid action without a Raze token?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: NO\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: which type of risk was identified as the leading concern globally? -A: Fairness risks. -B: Privacy and data governance risks. -C: Risks related to generative AI deployment.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In which geographical area were fairness risks selected by the smallest percentage of respondents? -A: Asia. -B: Europe. -C: North America.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:18.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: According to the guide, what is the typical license used to grant reuse rights with libre open access? -A: GNU General Public License -B: Creative Commons license -C: MIT license\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:19.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:19.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Does open access eliminate price barriers?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:19.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: YES\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:19.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:19.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: What is a major consequence of the rising training costs for foundation models? -A: The exclusion of universities from developing leading-edge foundation models. -B: Increased collaboration between universities and AI companies. -C: A decrease in the number of policy initiatives related to AI research.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:20.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: A\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:20.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How the AI Index and Epoch AI estimated training costs for foundation models? -A: By surveying AI companies on their reported expenses. -B: By analyzing government funding allocated to AI research. -C: By analyzing training duration, hardware type, quantity, and utilization rate.\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:20.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: C\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:20.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:20.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: How many AI-related regulations were enacted in the United States in 2023?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:21.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 25\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:21.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: Which of the following was identified as a high relevance AI regulation? -A: Securities and Exchange Commission’s Cybersecurity Risk Management Strategy. -B: Copyright Office and Library of Congress’ Copyright Registration Guidance. -C: Regulations related to foreign trade and international finance\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:21.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: B\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:21.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mPredicting\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:21.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mQuestion: In what year did the Bill and Melinda Gates foundation implement an open access policy?\u001b[0m\n", - "\u001b[32m2025-02-04 18:02:21.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_section_questions\u001b[0m:\u001b[36m40\u001b[0m - \u001b[1mAnswer: 2015\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -714,57 +292,11 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Unnamed: 0 document \\\n", - "10 10 https://arxiv.org/pdf/1706.03762 \n", - "28 28 https://arxiv.org/pdf/2201.11903 \n", - "32 32 https://arxiv.org/pdf/2201.11903 \n", - "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", - "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", - "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", - "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", - "58 58 https://github.com/mozilla-ai/structured-qa/re... \n", - "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", - "94 94 https://arxiv.org/pdf/2302.13971 \n", - "\n", - " type section \\\n", - "10 Scientific Paper 5.4 Regularization \n", - "28 Scientific Report 3.1 Experimental Setup \n", - "32 Scientific Report 5 Symbolic Reasoning \n", - "37 Board Game CARD AND TILE EFFECTS \n", - "41 Board Game CHAPTER OVERVIEW \n", - "42 Board Game CARD AND TILE COSTS \n", - "55 Board Game EXPEDITION PHASE \n", - "58 Board Game LOCATION ABILITIES \n", - "68 Techincal Documentation 5.2. Thread Hierarchy \n", - "94 Scientific Report 3 Main results \n", - "\n", - " question answer \\\n", - "10 What was the dropout rate used for the base mo... 0.1 \n", - "28 How many large language models were evaluated? 5 \n", - "32 Which symbolic reasoning task is used as an ou... A \n", - "37 How many different races are there? 6 \n", - "41 After taking a landmark tile, do you reveal a ... NO \n", - "42 Can a player pay coins to compensate for missi... YES \n", - "55 Do you need a fish to conquer a distant island? YES \n", - "58 How many victory points are granted by a built... 1 \n", - "68 Can you identify a thread with a four-dimensio... NO \n", - "94 Was the model compared against GPT-4? NO \n", - "\n", - " pred_answer pred_section \n", - "10 PDROP = 0.1 NaN \n", - "28 FIVE NaN \n", - "32 I NEED MORE INFO NaN \n", - "37 5 NaN \n", - "41 YES NaN \n", - "42 NO NaN \n", - "55 NO NaN \n", - "58 I NEED MORE INFO NaN \n", - "68 I NEED MORE INFO NaN \n", - "94 I NEED MORE INFO NaN " - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 23,\n \"min\": 10,\n \"max\": 94,\n \"num_unique_values\": 10,\n \"samples\": [\n 68,\n 28,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Scientific Report\",\n \"Techincal Documentation\",\n \"Scientific Paper\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"5.2. Thread Hierarchy\",\n \"3.1 Experimental Setup\",\n \"CARD AND TILE COSTS\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Can you identify a thread with a four-dimensional index?\",\n \"How many large language models were evaluated?\",\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"0.1\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"PDROP = 0.1\",\n \"FIVE\",\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, "text/html": [ "\n", "
\n", @@ -1120,13 +652,59 @@ "
\n", " \n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 23,\n \"min\": 10,\n \"max\": 94,\n \"num_unique_values\": 10,\n \"samples\": [\n 68,\n 28,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"https://arxiv.org/pdf/1706.03762\",\n \"https://arxiv.org/pdf/2201.11903\",\n \"https://arxiv.org/pdf/2302.13971\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Scientific Report\",\n \"Techincal Documentation\",\n \"Scientific Paper\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"5.2. Thread Hierarchy\",\n \"3.1 Experimental Setup\",\n \"CARD AND TILE COSTS\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Can you identify a thread with a four-dimensional index?\",\n \"How many large language models were evaluated?\",\n \"Can a player pay coins to compensate for missing skill symbols in a Landmark Tile?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"0.1\",\n \"5\",\n \"YES\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"PDROP = 0.1\",\n \"FIVE\",\n \"NO\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_section\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } + "text/plain": [ + " Unnamed: 0 document \\\n", + "10 10 https://arxiv.org/pdf/1706.03762 \n", + "28 28 https://arxiv.org/pdf/2201.11903 \n", + "32 32 https://arxiv.org/pdf/2201.11903 \n", + "37 37 https://github.com/mozilla-ai/structured-qa/re... \n", + "41 41 https://github.com/mozilla-ai/structured-qa/re... \n", + "42 42 https://github.com/mozilla-ai/structured-qa/re... \n", + "55 55 https://github.com/mozilla-ai/structured-qa/re... \n", + "58 58 https://github.com/mozilla-ai/structured-qa/re... \n", + "68 68 https://docs.nvidia.com/cuda/pdf/CUDA_C_Progra... \n", + "94 94 https://arxiv.org/pdf/2302.13971 \n", + "\n", + " type section \\\n", + "10 Scientific Paper 5.4 Regularization \n", + "28 Scientific Report 3.1 Experimental Setup \n", + "32 Scientific Report 5 Symbolic Reasoning \n", + "37 Board Game CARD AND TILE EFFECTS \n", + "41 Board Game CHAPTER OVERVIEW \n", + "42 Board Game CARD AND TILE COSTS \n", + "55 Board Game EXPEDITION PHASE \n", + "58 Board Game LOCATION ABILITIES \n", + "68 Techincal Documentation 5.2. Thread Hierarchy \n", + "94 Scientific Report 3 Main results \n", + "\n", + " question answer \\\n", + "10 What was the dropout rate used for the base mo... 0.1 \n", + "28 How many large language models were evaluated? 5 \n", + "32 Which symbolic reasoning task is used as an ou... A \n", + "37 How many different races are there? 6 \n", + "41 After taking a landmark tile, do you reveal a ... NO \n", + "42 Can a player pay coins to compensate for missi... YES \n", + "55 Do you need a fish to conquer a distant island? YES \n", + "58 How many victory points are granted by a built... 1 \n", + "68 Can you identify a thread with a four-dimensio... NO \n", + "94 Was the model compared against GPT-4? NO \n", + "\n", + " pred_answer pred_section \n", + "10 PDROP = 0.1 NaN \n", + "28 FIVE NaN \n", + "32 I NEED MORE INFO NaN \n", + "37 5 NaN \n", + "41 YES NaN \n", + "42 NO NaN \n", + "55 NO NaN \n", + "58 I NEED MORE INFO NaN \n", + "68 I NEED MORE INFO NaN \n", + "94 I NEED MORE INFO NaN " + ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], "source": [ @@ -1151,14 +729,14 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.9029126213592233" ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], "source": [ @@ -1183,32 +761,10 @@ }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "edf16c851ed847a483d3cbc2022bc3aa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_02809691f37345feb9027bb55c58c50b", - "IPY_MODEL_92a91f686ac44ca39056f1ee95448c64", - "IPY_MODEL_ebd54259bb264e72bc12dc33ececa0d3" - ], - "layout": "IPY_MODEL_44715f975fc441cfba941f061d728cfd" - } - }, "02809691f37345feb9027bb55c58c50b": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1226,55 +782,10 @@ "value": "Qwen2.5-7B-Instruct-Q8_0.gguf: 100%" } }, - "92a91f686ac44ca39056f1ee95448c64": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4dbddc95288342e38aac3f84a48eaf5d", - "max": 8098525888, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_57a78a6a8cd244c989bc1273c73a9f71", - "value": 8098525888 - } - }, - "ebd54259bb264e72bc12dc33ececa0d3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_33ba4f58a45644a0a3ade33cd0add51d", - "placeholder": "​", - "style": "IPY_MODEL_fc63523a46b44a63820335ec4976a246", - "value": " 8.10G/8.10G [03:12<00:00, 41.7MB/s]" - } - }, - "44715f975fc441cfba941f061d728cfd": { + "33ba4f58a45644a0a3ade33cd0add51d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1323,10 +834,10 @@ "width": null } }, - "4f518be579c64f9a82bbd0db44c65492": { + "44715f975fc441cfba941f061d728cfd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1375,25 +886,10 @@ "width": null } }, - "7fbb4559c29547e19ade206b66e3f6c4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, "4dbddc95288342e38aac3f84a48eaf5d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1442,26 +938,10 @@ "width": null } }, - "57a78a6a8cd244c989bc1273c73a9f71": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "33ba4f58a45644a0a3ade33cd0add51d": { + "4f518be579c64f9a82bbd0db44c65492": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1510,10 +990,108 @@ "width": null } }, - "fc63523a46b44a63820335ec4976a246": { + "57a78a6a8cd244c989bc1273c73a9f71": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7fbb4559c29547e19ade206b66e3f6c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "92a91f686ac44ca39056f1ee95448c64": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4dbddc95288342e38aac3f84a48eaf5d", + "max": 8098525888, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_57a78a6a8cd244c989bc1273c73a9f71", + "value": 8098525888 + } + }, + "ebd54259bb264e72bc12dc33ececa0d3": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33ba4f58a45644a0a3ade33cd0add51d", + "placeholder": "​", + "style": "IPY_MODEL_fc63523a46b44a63820335ec4976a246", + "value": " 8.10G/8.10G [03:12<00:00, 41.7MB/s]" + } + }, + "edf16c851ed847a483d3cbc2022bc3aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_02809691f37345feb9027bb55c58c50b", + "IPY_MODEL_92a91f686ac44ca39056f1ee95448c64", + "IPY_MODEL_ebd54259bb264e72bc12dc33ececa0d3" + ], + "layout": "IPY_MODEL_44715f975fc441cfba941f061d728cfd" + } + }, + "fc63523a46b44a63820335ec4976a246": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1530,4 +1108,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From c99adb0e3bf343fd93a0a0023b82748adc0209bf Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 5 Feb 2025 16:36:52 +0100 Subject: [PATCH 118/120] Update pre-commit --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 41710a9..d52a76d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,13 +16,13 @@ repos: - id: sort-simple-yaml - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.7.3' + rev: 'v0.9.4' hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell - exclude: CODE_OF_CONDUCT.md|benchmark/* + exclude: CODE_OF_CONDUCT.md|benchmark From a114fe5a5a79f22cb682600e79ae05db2712e732 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 5 Feb 2025 16:42:42 +0100 Subject: [PATCH 119/120] Update docstrings --- benchmark/gemini_RAGatouille.ipynb | 2 +- benchmark/qwen_2_5_7B_RAGatouille.ipynb | 9 --------- src/structured_qa/preprocessing.py | 12 +----------- src/structured_qa/workflow.py | 6 ++++-- 4 files changed, 6 insertions(+), 23 deletions(-) diff --git a/benchmark/gemini_RAGatouille.ipynb b/benchmark/gemini_RAGatouille.ipynb index e6e2a90..8fabba2 100644 --- a/benchmark/gemini_RAGatouille.ipynb +++ b/benchmark/gemini_RAGatouille.ipynb @@ -115,7 +115,7 @@ }, "outputs": [], "source": [ - "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv" + "!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/main/benchmark/structured_qa.csv" ] }, { diff --git a/benchmark/qwen_2_5_7B_RAGatouille.ipynb b/benchmark/qwen_2_5_7B_RAGatouille.ipynb index 3cb2ce1..228c40b 100644 --- a/benchmark/qwen_2_5_7B_RAGatouille.ipynb +++ b/benchmark/qwen_2_5_7B_RAGatouille.ipynb @@ -1011,15 +1011,6 @@ "accuracy = sum(results[\"answer\"] == results[\"pred_answer\"]) / len(results)\n", "accuracy" ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "UXg_TC7R28QI" - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/src/structured_qa/preprocessing.py b/src/structured_qa/preprocessing.py index a9d9c0c..ed77bf0 100644 --- a/src/structured_qa/preprocessing.py +++ b/src/structured_qa/preprocessing.py @@ -17,17 +17,7 @@ def split_markdown_by_headings( heading_patterns (str, optional): A list of regex patterns representing heading markers in the markdown document. Defaults to None. - If None, the default patterns are used: - - ```python - [ - r"^#\s+(.+)$", - r"^##\s+(.+)$", - r"^###\s+(.+)$", - r"^\*\*[\d\.]+\.\*\*\s*\*\*(.+)\*\*$", - r"^\*\*[\d\.]+\.\*\*\s+(.+)$" - ] - ``` + If None, the default patterns are used. Returns: dict[str, str]: A dictionary where the keys are the section names and the values are the section contents. diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index 6a768b4..f1b7798 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -3,6 +3,8 @@ from loguru import logger from rapidfuzz import process +from structured_qa.model_loaders import LlamaModel + def get_matching_section(response, section_names): """ @@ -13,7 +15,7 @@ def get_matching_section(response, section_names): def find_retrieve_answer( question: str, - model, + model: LlamaModel, sections_dir: str, find_prompt: str, answer_prompt: str, @@ -24,7 +26,7 @@ def find_retrieve_answer( Args: question (str): The question to answer. - model: The model to use for generating completions. + model (LlamaModel): The model to use for generating completions. sections_dir (str): The directory containing the sections. See [`document_to_sections_dir`][structured_qa.preprocessing.document_to_sections_dir]. Structure of the sections directory: From eec44b0586bf3a2da6a4636fa37124305227cd51 Mon Sep 17 00:00:00 2001 From: daavoo Date: Wed, 5 Feb 2025 17:01:16 +0100 Subject: [PATCH 120/120] Update test --- benchmark/statistics.ipynb | 72 ----------------------------------- src/structured_qa/workflow.py | 2 +- tests/unit/test_workflow.py | 22 ++++++----- 3 files changed, 13 insertions(+), 83 deletions(-) delete mode 100644 benchmark/statistics.ipynb diff --git a/benchmark/statistics.ipynb b/benchmark/statistics.ipynb deleted file mode 100644 index e3652e8..0000000 --- a/benchmark/statistics.ipynb +++ /dev/null @@ -1,72 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "import plotly.express as px\n", - "from loguru import logger\n", - "\n", - "from structured_qa.preprocessing import document_to_sections_dir\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data = pd.read_csv(\"structured_qa.csv\")\n", - "\n", - "n_characters = []\n", - "n_sections = []\n", - "n_questions = []\n", - "for document_link, document_data in data.groupby(\"document\"):\n", - " downloaded_document = Path(f\"{Path(document_link).name}.pdf\")\n", - " if not Path(downloaded_document).exists():\n", - " urlretrieve(document_link, downloaded_document)\n", - " logger.info(f\"Downloaded {document_link} to {downloaded_document}\")\n", - " n_characters.append(len(downloaded_document.read_text().strip()))\n", - " sections = document_to_sections_dir(downloaded_document, \"/tmp/sections\")\n", - " n_sections.append(len(sections))\n", - " n_questions.append(len(document_data))\n", - " logger.info(f\"N sections: {len(sections)}\")\n", - " logger.info(f\"N questions: {len(document_data)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/structured_qa/workflow.py b/src/structured_qa/workflow.py index f1b7798..651df5f 100644 --- a/src/structured_qa/workflow.py +++ b/src/structured_qa/workflow.py @@ -62,7 +62,7 @@ def find_retrieve_answer( max_sections_to_check = min(20, len(sections_names)) sections_checked = [] - while len(sections_checked) < max_sections_to_check: + while len(sections_checked) <= max_sections_to_check: logger.debug(f"Current information available: {current_info}") if not current_info: logger.debug("Finding section") diff --git a/tests/unit/test_workflow.py b/tests/unit/test_workflow.py index 34b6c79..d87278d 100644 --- a/tests/unit/test_workflow.py +++ b/tests/unit/test_workflow.py @@ -8,15 +8,15 @@ def test_find_retrieve_answer_multi_sections(tmp_path, mocker): def side_effect(messages): if FIND_PROMPT[:10] in messages[0]["content"]: if "section_1" in messages[0]["content"]: - return {"choices": [{"message": {"content": "section_1"}}]} + return "section_1" else: - return {"choices": [{"message": {"content": "section_2"}}]} + return "section_2" elif "Section 1" in messages[0]["content"]: - return {"choices": [{"message": {"content": "I need more info."}}]} + return "I need more info." elif "Section 2" in messages[0]["content"]: - return {"choices": [{"message": {"content": "Answer in Section 2"}}]} + return "Answer in Section 2" - model.create_chat_completion.side_effect = side_effect + model.get_response.side_effect = side_effect sections_dir = tmp_path / "sections" sections_dir.mkdir() @@ -36,14 +36,16 @@ def side_effect(messages): assert sections_checked == ["section_1", "section_2"] -def test_find_retrieve_answer_unkown_section(tmp_path, mocker): +def test_find_retrieve_answer_mispelled_section(tmp_path, mocker): model = mocker.MagicMock() def side_effect(messages): if FIND_PROMPT[:10] in messages[0]["content"]: - return {"choices": [{"message": {"content": "section_x"}}]} + return "SecTION 1" + else: + return "I need more info." - model.create_chat_completion.side_effect = side_effect + model.get_response.side_effect = side_effect sections_dir = tmp_path / "sections" sections_dir.mkdir() @@ -58,5 +60,5 @@ def side_effect(messages): answer_prompt=ANSWER_PROMPT, ) - assert answer is None - assert sections_checked == [] + assert answer == "NOT FOUND" + assert sections_checked == ["section_1"]