Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
from typing import Optional, Tuple, List, Dict, Union, Any, Callable, Awaitable

from cognee.eval_framework.corpus_builder.task_getters.TaskGetters import TaskGetters
from cognee.eval_framework.benchmark_adapters.benchmark_adapters import BenchmarkAdapter
from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.pipelines.tasks.Task import Task
Expand Down
2 changes: 0 additions & 2 deletions cognee/eval_framework/corpus_builder/run_corpus_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import json
from typing import List

from unstructured.chunking.dispatch import chunk

from cognee.infrastructure.files.storage import LocalStorage
from cognee.eval_framework.corpus_builder.corpus_builder_executor import CorpusBuilderExecutor
from cognee.modules.data.models.questions_base import QuestionsBase
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@
from cognee.eval_framework.corpus_builder.task_getters.get_cascade_graph_tasks import (
get_cascade_graph_tasks,
)
from cognee.eval_framework.corpus_builder.task_getters.get_default_tasks_by_indices import (
get_no_summary_tasks,
get_just_chunks_tasks,
)


class TaskGetters(Enum):
"""Enum mapping task getter types to their respective functions."""

DEFAULT = ("Default", get_default_tasks)
CASCADE_GRAPH = ("CascadeGraph", get_cascade_graph_tasks)
NO_SUMMARIES = ("NoSummaries", get_no_summary_tasks)
JUST_CHUNKS = ("JustChunks", get_just_chunks_tasks)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe OnlyChunks, its just naming so not that important


def __new__(cls, getter_name: str, getter_func: Callable[..., Awaitable[List[Task]]]):
obj = object.__new__(cls)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List, Awaitable, Optional
from cognee.api.v1.cognify.cognify_v2 import get_default_tasks
from cognee.modules.pipelines.tasks.Task import Task
from cognee.modules.chunking.TextChunker import TextChunker


async def get_default_tasks_by_indices(
indices: List[int], chunk_size: int = None, chunker=TextChunker
) -> List[Task]:
"""Returns default tasks filtered by the provided indices."""
all_tasks = await get_default_tasks(chunker=chunker, chunk_size=chunk_size)

if any(i < 0 or i >= len(all_tasks) for i in indices):
raise IndexError(
f"Task indices {indices} out of range. Valid range: 0-{len(all_tasks) - 1}"
)

return [all_tasks[i] for i in indices]


async def get_no_summary_tasks(chunk_size: int = None, chunker=TextChunker) -> List[Task]:
"""Returns default tasks without summarization tasks."""
# Default tasks indices: 0=classify, 1=check_permissions, 2=extract_chunks, 3=extract_graph, 4=summarize, 5=add_data_points
return await get_default_tasks_by_indices(
[0, 1, 2, 3, 5], chunk_size=chunk_size, chunker=chunker
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I like the indices because whenever we change the default pipeline, we will have to update the index mapping. Fine for now but I think we will have to find a bit nicer solution

)


async def get_just_chunks_tasks(chunk_size: int = None, chunker=TextChunker) -> List[Task]:
"""Returns default tasks with only chunk extraction and data points addition."""
# Default tasks indices: 0=classify, 1=check_permissions, 2=extract_chunks, 3=extract_graph, 4=summarize, 5=add_data_points
return await get_default_tasks_by_indices([0, 1, 2, 5], chunk_size=chunk_size, chunker=chunker)
4 changes: 3 additions & 1 deletion cognee/eval_framework/eval_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ class EvalConfig(BaseSettings):
building_corpus_from_scratch: bool = True
number_of_samples_in_corpus: int = 1
benchmark: str = "Dummy" # Options: 'HotPotQA', 'Dummy', 'TwoWikiMultiHop'
task_getter_type: str = "Default" # Options: 'Default', 'CascadeGraph'
task_getter_type: str = (
"Default" # Options: 'Default', 'CascadeGraph', 'NoSummaries', 'JustChunks'
)

# Question answering params
answering_questions: bool = True
Expand Down
Loading