Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/e2e_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,31 @@ jobs:
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_dataset_database_handler.py

test-dataset-database-deletion:
name: Test dataset database deletion in Cognee
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'

- name: Run dataset databases deletion test
env:
ENV: 'dev'
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_dataset_delete.py

test-permissions:
name: Test permissions with different situations in Cognee
runs-on: ubuntu-22.04
Expand Down
2 changes: 1 addition & 1 deletion cognee/api/v1/cognify/cognify.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ async def cognify(
custom_prompt: Optional[str] = None,
temporal_cognify: bool = False,
data_per_batch: int = 20,
**kwargs
**kwargs,
):
"""
Transform ingested data into a structured knowledge graph.
Expand Down
6 changes: 3 additions & 3 deletions cognee/api/v1/datasets/routers/get_datasets_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,14 @@ async def delete_dataset(dataset_id: UUID, user: User = Depends(get_authenticate
},
)

from cognee.modules.data.methods import get_dataset, delete_dataset
from cognee.modules.data.methods import delete_dataset

dataset = await get_dataset(user.id, dataset_id)
dataset = await get_authorized_existing_datasets([dataset_id], "delete", user)

if dataset is None:
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")

await delete_dataset(dataset)
await delete_dataset(dataset[0])

@router.delete(
"/{dataset_id}/data/{data_id}",
Expand Down
2 changes: 2 additions & 0 deletions cognee/infrastructure/databases/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .get_or_create_dataset_database import get_or_create_dataset_database
from .resolve_dataset_database_connection_info import resolve_dataset_database_connection_info
from .get_graph_dataset_database_handler import get_graph_dataset_database_handler
from .get_vector_dataset_database_handler import get_vector_dataset_database_handler
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase


def get_graph_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
supported_dataset_database_handlers,
)

handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
return handler
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase


def get_vector_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
supported_dataset_database_handlers,
)

handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler]
return handler
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
get_graph_dataset_database_handler,
)
from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
get_vector_dataset_database_handler,
)
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase


async def _get_vector_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase:
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
supported_dataset_database_handlers,
)

handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler]
return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database)


async def _get_graph_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase:
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
supported_dataset_database_handlers,
)

handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database)


async def resolve_dataset_database_connection_info(
dataset_database: DatasetDatabase,
) -> DatasetDatabase:
Expand All @@ -31,6 +19,12 @@ async def resolve_dataset_database_connection_info(
Returns:
DatasetDatabase instance with resolved connection info
"""
dataset_database = await _get_vector_db_connection_info(dataset_database)
dataset_database = await _get_graph_db_connection_info(dataset_database)
vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
dataset_database = await vector_dataset_database_handler[
"handler_instance"
].resolve_dataset_connection_info(dataset_database)
dataset_database = await graph_dataset_database_handler[
"handler_instance"
].resolve_dataset_connection_info(dataset_database)
return dataset_database
5 changes: 4 additions & 1 deletion cognee/infrastructure/llm/LLMGateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ def acreate_structured_output(

llm_client = get_llm_client()
return llm_client.acreate_structured_output(
text_input=text_input, system_prompt=system_prompt, response_model=response_model, **kwargs
text_input=text_input,
system_prompt=system_prompt,
response_model=response_model,
**kwargs,
)

@staticmethod
Expand Down
38 changes: 12 additions & 26 deletions cognee/modules/data/deletion/prune_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.infrastructure.databases.utils import (
get_graph_dataset_database_handler,
get_vector_dataset_database_handler,
)
from cognee.shared.cache import delete_cache
from cognee.modules.users.models import DatasetDatabase
from cognee.shared.logging_utils import get_logger
Expand All @@ -13,22 +17,13 @@


async def prune_graph_databases():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add docstring to document function purpose.

Per coding guidelines, undocumented function definitions are considered incomplete. This function should have a docstring explaining that it deletes all graph databases for all dataset_database records when backend access control is enabled.

Apply this diff:

 async def prune_graph_databases():
+    """
+    Delete all graph databases for all dataset_database records.
+    Used when backend_access_control is enabled to prune graph databases per-dataset.
+    """
     db_engine = get_relational_engine()

As per coding guidelines, undocumented functions are assumed incomplete.

🤖 Prompt for AI Agents
In cognee/modules/data/deletion/prune_system.py around line 19, the function
prune_graph_databases lacks a docstring; add a concise docstring immediately
below the async def that states the function's purpose: that it deletes all
graph databases for all dataset_database records when backend access control is
enabled, describes input (none) and behavior (side-effect deletion) and notes
any exceptions or important preconditions (e.g., requires backend access control
enabled). Ensure the docstring follows project style (single- or triple-quoted
summary) and is brief and descriptive.

async def _prune_graph_db(dataset_database: DatasetDatabase) -> dict:
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
supported_dataset_database_handlers,
)

handler = supported_dataset_database_handlers[
dataset_database.graph_dataset_database_handler
]
return await handler["handler_instance"].delete_dataset(dataset_database)

db_engine = get_relational_engine()
try:
data = await db_engine.get_all_data_from_table("dataset_database")
dataset_databases = await db_engine.get_all_data_from_table("dataset_database")
# Go through each dataset database and delete the graph database
for data_item in data:
await _prune_graph_db(data_item)
for dataset_database in dataset_databases:
handler = get_graph_dataset_database_handler(dataset_database)
await handler["handler_instance"].delete_dataset(dataset_database)
Comment on lines +22 to +26
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

cat -n cognee/modules/data/deletion/prune_system.py

Repository: topoteretes/cognee

Length of output: 3544


🏁 Script executed:

# Find the db_engine.get_all_data_from_table implementation
find . -type f -name "*.py" | xargs grep -l "def get_all_data_from_table" | head -5

Repository: topoteretes/cognee

Length of output: 138


🏁 Script executed:

# Find get_graph_dataset_database_handler function
rg -n "def get_graph_dataset_database_handler" --type=py

Repository: topoteretes/cognee

Length of output: 222


🏁 Script executed:

# Find DatasetDatabase class definition
rg -n "class DatasetDatabase" --type=py

Repository: topoteretes/cognee

Length of output: 280


🏁 Script executed:

cat -n cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py | grep -A 20 "def get_all_data_from_table"

Repository: topoteretes/cognee

Length of output: 1057


🏁 Script executed:

cat -n cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py

Repository: topoteretes/cognee

Length of output: 569


🏁 Script executed:

cat -n cognee/modules/users/models/DatasetDatabase.py

Repository: topoteretes/cognee

Length of output: 2140


🏁 Script executed:

cat -n cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py | sed -n '417,460p'

Repository: topoteretes/cognee

Length of output: 1885


🏁 Script executed:

# Search for complete implementation of get_all_data_from_table
rg -A 30 "async def get_all_data_from_table" cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py

Repository: topoteretes/cognee

Length of output: 1234


🏁 Script executed:

# Check how get_all_data_from_table is used elsewhere in the codebase to see if there are other examples
rg -B 2 -A 5 "get_all_data_from_table\(" --type=py | head -50

Repository: topoteretes/cognee

Length of output: 3949


🏁 Script executed:

# Check SQLAlchemy Row behavior - does mappings() support attribute access?
python3 << 'EOF'
# Check SQLAlchemy documentation for Row behavior
import sys
try:
    from sqlalchemy import Row
    # Check if Row supports attribute access
    print("SQLAlchemy Row class:", Row)
    print("Row supports attribute access via __getattr__")
    help(Row)
except Exception as e:
    print(f"Error: {e}")
EOF

Repository: topoteretes/cognee

Length of output: 97


🏁 Script executed:

# Let's check if there are any similar patterns in the codebase where Row mappings are used with attribute access
rg -B 3 -A 3 "\.mappings\(\)" --type=py cognee/infrastructure/databases/relational/sqlalchemy/

Repository: topoteretes/cognee

Length of output: 802


🏁 Script executed:

# Check if DatasetDatabase is instantiated directly elsewhere or if Row objects are used directly
rg -B 2 -A 2 "DatasetDatabase\(" --type=py cognee/ | grep -v test | head -40

Repository: topoteretes/cognee

Length of output: 942


🏁 Script executed:

# Check SQLAlchemy version in requirements or setup files
find . -name "requirements*.txt" -o -name "pyproject.toml" -o -name "setup.py" | xargs grep -i sqlalchemy

Repository: topoteretes/cognee

Length of output: 230


🏁 Script executed:

# Check if there are other usages where get_all_data_from_table results are used with attribute access
rg -B 2 -A 5 "get_all_data_from_table" --type=py cognee/ | grep -A 5 "for.*in.*get_all_data_from_table"

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Look for prune_vector_databases which has same pattern as prune_graph_databases
cat -n cognee/modules/data/deletion/prune_system.py | sed -n '35,48p'

Repository: topoteretes/cognee

Length of output: 825


🏁 Script executed:

# Check SQLAlchemy Row attribute access support in 2.0+
# Search for any existing uses of Row attribute access in the codebase
rg "result\[" cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py | head -10

Repository: topoteretes/cognee

Length of output: 140


🏁 Script executed:

# Look for any test that verifies the attribute access on Row objects from get_all_data_from_table
rg -B 2 -A 2 "result\[.*\]" cognee/tests/test_deduplication.py | head -20

Repository: topoteretes/cognee

Length of output: 1194


🏁 Script executed:

# Check if get_vector_dataset_database_handler also uses attribute access like get_graph_dataset_database_handler
cat -n cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py

Repository: topoteretes/cognee

Length of output: 571


🏁 Script executed:

# Check if there's a typing issue - look at the actual parameter types expected by the handlers
rg -B 5 -A 10 "def get_graph_dataset_database_handler" cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py

Repository: topoteretes/cognee

Length of output: 499


🏁 Script executed:

# Check if Row objects from SQLAlchemy 2.0 are being used with attribute access anywhere in the codebase
rg "for .* in .*get_all_data_from_table" -A 3 --type=py cognee/ | grep -v test

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Check if there are any Row to Model conversions happening anywhere
rg "Row\|mappings" --type=py cognee/infrastructure/databases/relational/sqlalchemy/

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Let me verify SQLAlchemy 2.0 Row behavior by checking pyproject.toml more carefully
cat -n pyproject.toml | grep -A 5 -B 5 "sqlalchemy"

Repository: topoteretes/cognee

Length of output: 1354


🏁 Script executed:

# Check if Row objects truly support both dict and attribute access in actual usage
# Look for any error handling or type: ignore comments related to this
rg "type: ignore|# noqa" cognee/modules/data/deletion/prune_system.py

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Search for any other functions that receive Row objects and use attribute access
rg -B 3 "\.graph_dataset_database_handler|\.vector_dataset_database_handler" --type=py cognee/

Repository: topoteretes/cognee

Length of output: 3574


🏁 Script executed:

# Verify SQLAlchemy 2.0+ Row attribute access support by checking actual Row class behavior
# Look for any issues or type annotations in the codebase related to this
rg "RoomRowMapping|LegacyRow" --type=py cognee/

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Check if there are type checking issues - look for mypy or pyright configurations
cat pyproject.toml | grep -A 10 "\[tool.mypy\]"

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Verify the actual usage pattern - if the code works, Row objects must support attribute access
# Check if there are any integration tests that use prune_system
rg "prune_graph_databases\|prune_vector_databases" --type=py cognee/tests/

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Final verification: Check SQLAlchemy Row documentation behavior in 2.0
# by looking at actual attribute access patterns in the codebase
rg "for .* in .*await.*\." -A 3 --type=py cognee/modules/data/ | head -30

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Check if Row objects from mappings() actually support attribute access
# by looking for any similar patterns where Row attributes are accessed
rg "dataset_databases\[.*\]\." --type=py cognee/

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Verify by checking actual usage in context_global_variables where similar access happens
cat -n cognee/context_global_variables.py | sed -n '1,80p'

Repository: topoteretes/cognee

Length of output: 5277


🏁 Script executed:

# Final check: SQLAlchemy 2.0+ Row objects support both dict and attribute access
# Let's verify this by checking the actual Row implementation usage
rg "from sqlalchemy" cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py | head -10

Repository: topoteretes/cognee

Length of output: 309


🏁 Script executed:

# Check if there's any Row wrapping or conversion happening
rg "class.*Row|def.*mappings" --type=py cognee/infrastructure/databases/

Repository: topoteretes/cognee

Length of output: 44


🏁 Script executed:

# Verify the complete picture - check what happens in a similar function that works
cat -n cognee/infrastructure/databases/utils/get_or_create_dataset_database.py | head -50

Repository: topoteretes/cognee

Length of output: 2232


🏁 Script executed:

# Let's verify SQLAlchemy Row attribute access by examining the actual imports and checking documentation
# SQLAlchemy 2.0+ Row objects support both dict and attribute access by default
python3 << 'EOF'
# Check SQLAlchemy 2.0 Row behavior documentation
# According to SQLAlchemy 2.0 docs, Row objects returned from mappings() support attribute access
# This is a documented feature of SQLAlchemy 2.0+

# The Row class in SQLAlchemy 2.0+ implements both:
# - __getitem__ for dict-like access: row["column_name"]
# - __getattr__ for attribute access: row.column_name

print("SQLAlchemy 2.0+ Row objects support BOTH dict-like and attribute access:")
print("- row['column_name'] works")
print("- row.column_name works")
print("\nThis is documented behavior in SQLAlchemy 2.0+")
print("See: https://docs.sqlalchemy.org/en/20/orm/queryguide.html#rows-are-namedtuples")
EOF

Repository: topoteretes/cognee

Length of output: 313


🏁 Script executed:

# Verify that the code actually works with Row attribute access by checking for any actual runtime errors
# or exception handling around this pattern
rg "AttributeError|except.*Error" cognee/modules/data/deletion/prune_system.py

Repository: topoteretes/cognee

Length of output: 250


Add docstring to prune_graph_databases function.

The function lacks documentation as required by project guidelines (code style guideline: "undocumented function definitions in the project's Python code are assumed incomplete").

🤖 Prompt for AI Agents
cognee/modules/data/deletion/prune_system.py lines 22-26: The function
prune_graph_databases is missing a docstring; add a concise triple-quoted
docstring immediately below the def line that states the function purpose
(prune/delete graph DBs for all dataset_database records), describes parameters
(if any, e.g., db_engine) and their types, notes return value (None) and side
effects (calls handler.delete_dataset for each dataset_database), and documents
possible exceptions or errors that may be raised; keep it short (2–4 sentences)
and follow project docstring style.

Comment on lines +25 to +26
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Consider validating handler dictionary structure.

The code accesses handler["handler_instance"] without verifying the key exists, which could raise a KeyError if the handler structure changes.

Consider adding defensive validation:

             handler = get_graph_dataset_database_handler(dataset_database)
+            if "handler_instance" not in handler:
+                logger.warning("Handler missing 'handler_instance' key for dataset_database")
+                continue
             await handler["handler_instance"].delete_dataset(dataset_database)

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In cognee/modules/data/deletion/prune_system.py around lines 25-26, the code
assumes handler["handler_instance"] exists which can raise KeyError; validate
that handler is a dict and contains the "handler_instance" key (and that its
value is not None and exposes delete_dataset) before calling await
handler["handler_instance"].delete_dataset(dataset_database); if validation
fails, log or raise a clear error with context (including dataset_database) or
skip gracefully to avoid unhandled exceptions.

except (OperationalError, EntityNotFoundError) as e:
logger.debug(
"Skipping pruning of graph DB. Error when accessing dataset_database table: %s",
Expand All @@ -38,22 +33,13 @@ async def _prune_graph_db(dataset_database: DatasetDatabase) -> dict:


async def prune_vector_databases():
async def _prune_vector_db(dataset_database: DatasetDatabase) -> dict:
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
supported_dataset_database_handlers,
)

handler = supported_dataset_database_handlers[
dataset_database.vector_dataset_database_handler
]
return await handler["handler_instance"].delete_dataset(dataset_database)

db_engine = get_relational_engine()
try:
data = await db_engine.get_all_data_from_table("dataset_database")
dataset_databases = await db_engine.get_all_data_from_table("dataset_database")
# Go through each dataset database and delete the vector database
for data_item in data:
await _prune_vector_db(data_item)
for dataset_database in dataset_databases:
handler = get_vector_dataset_database_handler(dataset_database)
await handler["handler_instance"].delete_dataset(dataset_database)
except (OperationalError, EntityNotFoundError) as e:
logger.debug(
"Skipping pruning of vector DB. Error when accessing dataset_database table: %s",
Expand Down
26 changes: 26 additions & 0 deletions cognee/modules/data/methods/delete_dataset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,34 @@
from cognee.modules.users.models import DatasetDatabase
from sqlalchemy import select

from cognee.modules.data.models import Dataset
from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
get_vector_dataset_database_handler,
)
from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
get_graph_dataset_database_handler,
)
from cognee.infrastructure.databases.relational import get_relational_engine


async def delete_dataset(dataset: Dataset):
db_engine = get_relational_engine()

async with db_engine.get_async_session() as session:
stmt = select(DatasetDatabase).where(
DatasetDatabase.dataset_id == dataset.id,
)
dataset_database: DatasetDatabase = await session.scalar(stmt)
if dataset_database:
graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
await graph_dataset_database_handler["handler_instance"].delete_dataset(
dataset_database
)
await vector_dataset_database_handler["handler_instance"].delete_dataset(
dataset_database
)
# TODO: Remove dataset from pipeline_run_status in Data objects related to dataset as well
# This blocks recreation of the dataset with the same name and data after deletion as
# it's marked as completed and will be just skipped even though it's empty.
return await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)
76 changes: 76 additions & 0 deletions cognee/tests/test_dataset_delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
import asyncio
import pathlib
from uuid import UUID

import cognee
from cognee.shared.logging_utils import setup_logging, ERROR
from cognee.modules.data.methods.delete_dataset import delete_dataset
from cognee.modules.data.methods.get_dataset import get_dataset
from cognee.modules.users.methods import get_default_user


async def main():
# Set data and system directory paths
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete")
).resolve()
)
cognee.config.system_root_directory(cognee_directory_path)

# Create a clean slate for cognee -- reset data and system state
print("Resetting cognee data...")
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
print("Data reset complete.\n")

# cognee knowledge graph will be created based on this text
text = """
Natural language processing (NLP) is an interdisciplinary
subfield of computer science and information retrieval.
"""

# Add the text, and make it available for cognify
await cognee.add(text, "nlp_dataset")
await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset")

# Use LLMs and cognee to create knowledge graph
ret_val = await cognee.cognify()
user = await get_default_user()

for val in ret_val:
dataset_id = str(val)
vector_db_path = os.path.join(
cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db"
)
graph_db_path = os.path.join(
cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl"
)

# Check if databases are properly created and exist before deletion
assert os.path.exists(graph_db_path), "Graph database file not found."
assert os.path.exists(vector_db_path), "Vector database file not found."

dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id))
await delete_dataset(dataset)

# Confirm databases have been deleted
assert not os.path.exists(graph_db_path), "Graph database file found."
assert not os.path.exists(vector_db_path), "Vector database file found."


if __name__ == "__main__":
logger = setup_logging(log_level=ERROR)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
Loading