Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions cognee/api/v1/cognify/cognify.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from cognee.modules.users.models import User

from cognee.tasks.documents import (
check_permissions_on_dataset,
classify_documents,
extract_chunks_from_documents,
)
Expand Down Expand Up @@ -78,12 +77,11 @@ async def cognify(

Processing Pipeline:
1. **Document Classification**: Identifies document types and structures
2. **Permission Validation**: Ensures user has processing rights
3. **Text Chunking**: Breaks content into semantically meaningful segments
4. **Entity Extraction**: Identifies key concepts, people, places, organizations
5. **Relationship Detection**: Discovers connections between entities
6. **Graph Construction**: Builds semantic knowledge graph with embeddings
7. **Content Summarization**: Creates hierarchical summaries for navigation
2. **Text Chunking**: Breaks content into semantically meaningful segments
3. **Entity Extraction**: Identifies key concepts, people, places, organizations
4. **Relationship Detection**: Discovers connections between entities
5. **Graph Construction**: Builds semantic knowledge graph with embeddings
6. **Content Summarization**: Creates hierarchical summaries for navigation

Graph Model Customization:
The `graph_model` parameter allows custom knowledge structures:
Expand Down Expand Up @@ -274,7 +272,6 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's

default_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents,
max_chunk_size=chunk_size or get_max_chunk_tokens(),
Expand Down Expand Up @@ -305,14 +302,13 @@ async def get_temporal_tasks(

The pipeline includes:
1. Document classification.
2. Dataset permission checks (requires "write" access).
3. Document chunking with a specified or default chunk size.
4. Event and timestamp extraction from chunks.
5. Knowledge graph extraction from events.
6. Batched insertion of data points.
2. Document chunking with a specified or default chunk size.
3. Event and timestamp extraction from chunks.
4. Knowledge graph extraction from events.
5. Batched insertion of data points.

Args:
user (User, optional): The user requesting task execution, used for permission checks.
user (User, optional): The user requesting task execution.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove user argument if we don't need it.

chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
Expand All @@ -325,7 +321,6 @@ async def get_temporal_tasks(

temporal_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents,
max_chunk_size=chunk_size or get_max_chunk_tokens(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from cognee.shared.data_models import KnowledgeGraph
from cognee.shared.utils import send_telemetry
from cognee.tasks.documents import (
check_permissions_on_dataset,
classify_documents,
extract_chunks_from_documents,
)
Expand All @@ -31,7 +30,6 @@ async def get_cascade_graph_tasks(
cognee_config = get_cognify_config()
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
), # Extract text chunks based on the document type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ async def get_no_summary_tasks(
ontology_file_path=None,
) -> List[Task]:
"""Returns default tasks without summarization tasks."""
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
# Get base tasks (0=classify, 1=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)

ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)

Expand All @@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
chunk_size: int = None, chunker=TextChunker, user=None
) -> List[Task]:
"""Returns default tasks with only chunk extraction and data points addition."""
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
# Get base tasks (0=classify, 1=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)

add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})

Expand Down
1 change: 0 additions & 1 deletion cognee/tasks/documents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .classify_documents import classify_documents
from .extract_chunks_from_documents import extract_chunks_from_documents
from .check_permissions_on_dataset import check_permissions_on_dataset
26 changes: 0 additions & 26 deletions cognee/tasks/documents/check_permissions_on_dataset.py

This file was deleted.

11 changes: 4 additions & 7 deletions examples/python/simple_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,13 @@ async def main():
print("Cognify process steps:")
print("1. Classifying the document: Determining the type and category of the input text.")
print(
"2. Checking permissions: Ensuring the user has the necessary rights to process the text."
"2. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
)
print(
"3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
"3. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
)
print("4. Adding data points: Storing the extracted chunks for processing.")
print(
"5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
)
print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n")
print("4. Summarizing text: Creating concise summaries of the content for quick insights.")
print("5. Adding data points: Storing the extracted chunks for processing.\n")

# Use LLMs and cognee to create knowledge graph
await cognee.cognify()
Expand Down
4 changes: 1 addition & 3 deletions notebooks/cognee_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "7c431fdef4921ae0",
"metadata": {
"ExecuteTime": {
Expand All @@ -609,7 +609,6 @@
"from cognee.modules.pipelines import run_tasks\n",
"from cognee.modules.users.models import User\n",
"from cognee.tasks.documents import (\n",
" check_permissions_on_dataset,\n",
" classify_documents,\n",
" extract_chunks_from_documents,\n",
")\n",
Expand All @@ -627,7 +626,6 @@
"\n",
" tasks = [\n",
" Task(classify_documents),\n",
" Task(check_permissions_on_dataset, user=user, permissions=[\"write\"]),\n",
" Task(\n",
" extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()\n",
" ), # Extract text chunks based on the document type.\n",
Expand Down