diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 0fa345176d..4efec365a8 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -19,7 +19,6 @@ from cognee.modules.users.models import User from cognee.tasks.documents import ( - check_permissions_on_dataset, classify_documents, extract_chunks_from_documents, ) @@ -78,12 +77,11 @@ async def cognify( Processing Pipeline: 1. **Document Classification**: Identifies document types and structures - 2. **Permission Validation**: Ensures user has processing rights - 3. **Text Chunking**: Breaks content into semantically meaningful segments - 4. **Entity Extraction**: Identifies key concepts, people, places, organizations - 5. **Relationship Detection**: Discovers connections between entities - 6. **Graph Construction**: Builds semantic knowledge graph with embeddings - 7. **Content Summarization**: Creates hierarchical summaries for navigation + 2. **Text Chunking**: Breaks content into semantically meaningful segments + 3. **Entity Extraction**: Identifies key concepts, people, places, organizations + 4. **Relationship Detection**: Discovers connections between entities + 5. **Graph Construction**: Builds semantic knowledge graph with embeddings + 6. **Content Summarization**: Creates hierarchical summaries for navigation Graph Model Customization: The `graph_model` parameter allows custom knowledge structures: @@ -274,7 +272,6 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's default_tasks = [ Task(classify_documents), - Task(check_permissions_on_dataset, user=user, permissions=["write"]), Task( extract_chunks_from_documents, max_chunk_size=chunk_size or get_max_chunk_tokens(), @@ -305,14 +302,13 @@ async def get_temporal_tasks( The pipeline includes: 1. Document classification. - 2. Dataset permission checks (requires "write" access). - 3. Document chunking with a specified or default chunk size. - 4. Event and timestamp extraction from chunks. - 5. Knowledge graph extraction from events. - 6. Batched insertion of data points. + 2. Document chunking with a specified or default chunk size. + 3. Event and timestamp extraction from chunks. + 4. Knowledge graph extraction from events. + 5. Batched insertion of data points. Args: - user (User, optional): The user requesting task execution, used for permission checks. + user (User, optional): The user requesting task execution. chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker. chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default. chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify @@ -325,7 +321,6 @@ async def get_temporal_tasks( temporal_tasks = [ Task(classify_documents), - Task(check_permissions_on_dataset, user=user, permissions=["write"]), Task( extract_chunks_from_documents, max_chunk_size=chunk_size or get_max_chunk_tokens(), diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py b/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py index edac150157..1fbc31c02f 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py @@ -8,7 +8,6 @@ from cognee.shared.data_models import KnowledgeGraph from cognee.shared.utils import send_telemetry from cognee.tasks.documents import ( - check_permissions_on_dataset, classify_documents, extract_chunks_from_documents, ) @@ -31,7 +30,6 @@ async def get_cascade_graph_tasks( cognee_config = get_cognify_config() default_tasks = [ Task(classify_documents), - Task(check_permissions_on_dataset, user=user, permissions=["write"]), Task( extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens() ), # Extract text chunks based on the document type. diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py index fb10c7eed6..6a39a67cf7 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py @@ -30,8 +30,8 @@ async def get_no_summary_tasks( ontology_file_path=None, ) -> List[Task]: """Returns default tasks without summarization tasks.""" - # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) - base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) + # Get base tasks (0=classify, 1=extract_chunks) + base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker) ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path) @@ -51,8 +51,8 @@ async def get_just_chunks_tasks( chunk_size: int = None, chunker=TextChunker, user=None ) -> List[Task]: """Returns default tasks with only chunk extraction and data points addition.""" - # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) - base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) + # Get base tasks (0=classify, 1=extract_chunks) + base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker) add_data_points_task = Task(add_data_points, task_config={"batch_size": 10}) diff --git a/cognee/tasks/documents/__init__.py b/cognee/tasks/documents/__init__.py index f4582fbe0c..043625f357 100644 --- a/cognee/tasks/documents/__init__.py +++ b/cognee/tasks/documents/__init__.py @@ -1,3 +1,2 @@ from .classify_documents import classify_documents from .extract_chunks_from_documents import extract_chunks_from_documents -from .check_permissions_on_dataset import check_permissions_on_dataset diff --git a/cognee/tasks/documents/check_permissions_on_dataset.py b/cognee/tasks/documents/check_permissions_on_dataset.py deleted file mode 100644 index 01a03de5f9..0000000000 --- a/cognee/tasks/documents/check_permissions_on_dataset.py +++ /dev/null @@ -1,26 +0,0 @@ -from cognee.modules.data.processing.document_types import Document -from cognee.modules.users.permissions.methods import check_permission_on_dataset -from typing import List - - -async def check_permissions_on_dataset( - documents: List[Document], context: dict, user, permissions -) -> List[Document]: - """ - Validates a user's permissions on a list of documents. - - Notes: - - This function assumes that `check_permission_on_documents` raises an exception if the permission check fails. - - It is designed to validate multiple permissions in a sequential manner for the same set of documents. - - Ensure that the `Document` and `user` objects conform to the expected structure and interfaces. - """ - - for permission in permissions: - await check_permission_on_dataset( - user, - permission, - # TODO: pass dataset through argument instead of context - context["dataset"].id, - ) - - return documents diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index c13e48f853..347ace3658 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -32,16 +32,13 @@ async def main(): print("Cognify process steps:") print("1. Classifying the document: Determining the type and category of the input text.") print( - "2. Checking permissions: Ensuring the user has the necessary rights to process the text." + "2. Extracting text chunks: Breaking down the text into sentences or phrases for analysis." ) print( - "3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis." + "3. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph." ) - print("4. Adding data points: Storing the extracted chunks for processing.") - print( - "5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph." - ) - print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n") + print("4. Summarizing text: Creating concise summaries of the content for quick insights.") + print("5. Adding data points: Storing the extracted chunks for processing.\n") # Use LLMs and cognee to create knowledge graph await cognee.cognify() diff --git a/notebooks/cognee_demo.ipynb b/notebooks/cognee_demo.ipynb index 09c4c89bed..fe6ae50ae4 100644 --- a/notebooks/cognee_demo.ipynb +++ b/notebooks/cognee_demo.ipynb @@ -591,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "7c431fdef4921ae0", "metadata": { "ExecuteTime": { @@ -609,7 +609,6 @@ "from cognee.modules.pipelines import run_tasks\n", "from cognee.modules.users.models import User\n", "from cognee.tasks.documents import (\n", - " check_permissions_on_dataset,\n", " classify_documents,\n", " extract_chunks_from_documents,\n", ")\n", @@ -627,7 +626,6 @@ "\n", " tasks = [\n", " Task(classify_documents),\n", - " Task(check_permissions_on_dataset, user=user, permissions=[\"write\"]),\n", " Task(\n", " extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()\n", " ), # Extract text chunks based on the document type.\n",