From 91666847190c5f3e6cefd2999f3a77806cb18d1e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 16 Jun 2025 18:06:03 +0200 Subject: [PATCH 1/6] feat: Add alembic migration for permissions --- .../ab7e313804ae_permission_system_rework.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 alembic/versions/ab7e313804ae_permission_system_rework.py diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py new file mode 100644 index 0000000000..5cb219dca2 --- /dev/null +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -0,0 +1,97 @@ +"""permission_system_rework + +Revision ID: ab7e313804ae +Revises: 1d0bb7fede17 +Create Date: 2025-06-16 15:20:43.118246 + +""" + +from typing import Sequence, Union +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from datetime import datetime, timezone +from uuid import uuid4 + +# revision identifiers, used by Alembic. +revision: str = "ab7e313804ae" +down_revision: Union[str, None] = "1d0bb7fede17" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _add_dataset_permission(conn, user_id, dataset_id, permission_name): + from cognee.modules.users.models import Permission + + permission = conn.execute( + sa.select(Permission).filter(Permission.name == permission_name) + ).fetchone() + + if permission is None: + permission = Permission(name=permission_name) + + from cognee.modules.users.models import ACL + + op.bulk_insert( + ACL.__table__, + [ + { + "id": uuid4(), + "create_at": datetime.now(timezone.utc), + "updated_at": datetime.now(timezone.utc), + "principal_id": user_id, + "dataset_id": dataset_id, + "permission_id": permission.id, + } + ], + ) + + +def _uuid_type(): + """Return a UUID-compatible column type for the current dialect.""" + if op.get_bind().dialect.name == "postgresql": + return postgresql.UUID(as_uuid=True) + # SQLite (and others): fall back to CHAR(36) – application inserts uuid4() + return sa.CHAR(36) + + +def upgrade() -> None: + conn = op.get_bind() + + # Recreate ACLs table with default permissions set to datasets instead of documents + op.drop_table("acls") + + uuid_type = _uuid_type() + op.create_table( + "acls", + sa.Column("id", uuid_type, primary_key=True, nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("principal_id", uuid_type, sa.ForeignKey("principals.id"), nullable=True), + sa.Column("permission_id", uuid_type, sa.ForeignKey("permissions.id"), nullable=True), + sa.Column( + "dataset_id", + uuid_type, + sa.ForeignKey("datasets.id", ondelete="CASCADE"), + nullable=True, + ), + ) + + from cognee.modules.data.models import Dataset + + datasets = conn.execute(sa.select(Dataset)).fetchall() + + if not datasets: + return + + for dataset in datasets: + _add_dataset_permission(conn, dataset.owner_id, dataset.id, "read") + _add_dataset_permission(conn, dataset.owner_id, dataset.id, "write") + _add_dataset_permission(conn, dataset.owner_id, dataset.id, "share") + _add_dataset_permission(conn, dataset.owner_id, dataset.id, "delete") + + +def downgrade() -> None: + # op.drop_table('acls') + # op.create_table('acls') + pass From f6e7f71e4f75ef2bcf4c9eb1c8aaf3f7a321171a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 16 Jun 2025 22:11:10 +0200 Subject: [PATCH 2/6] refactor: User bulk insert --- .../ab7e313804ae_permission_system_rework.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py index 5cb219dca2..6ebf1f2597 100644 --- a/alembic/versions/ab7e313804ae_permission_system_rework.py +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -20,7 +20,7 @@ depends_on: Union[str, Sequence[str], None] = None -def _add_dataset_permission(conn, user_id, dataset_id, permission_name): +def _create_dataset_permission(conn, user_id, dataset_id, permission_name) -> dict: from cognee.modules.users.models import Permission permission = conn.execute( @@ -30,21 +30,14 @@ def _add_dataset_permission(conn, user_id, dataset_id, permission_name): if permission is None: permission = Permission(name=permission_name) - from cognee.modules.users.models import ACL - - op.bulk_insert( - ACL.__table__, - [ - { - "id": uuid4(), - "create_at": datetime.now(timezone.utc), - "updated_at": datetime.now(timezone.utc), - "principal_id": user_id, - "dataset_id": dataset_id, - "permission_id": permission.id, - } - ], - ) + return { + "id": uuid4(), + "create_at": datetime.now(timezone.utc), + "updated_at": datetime.now(timezone.utc), + "principal_id": user_id, + "dataset_id": dataset_id, + "permission_id": permission.id, + } def _uuid_type(): @@ -84,11 +77,18 @@ def upgrade() -> None: if not datasets: return + acl_list = [] + for dataset in datasets: - _add_dataset_permission(conn, dataset.owner_id, dataset.id, "read") - _add_dataset_permission(conn, dataset.owner_id, dataset.id, "write") - _add_dataset_permission(conn, dataset.owner_id, dataset.id, "share") - _add_dataset_permission(conn, dataset.owner_id, dataset.id, "delete") + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read")) + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write")) + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share")) + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete")) + + if acl_list: + from cognee.modules.users.models import ACL + + op.bulk_insert(ACL.__table__, acl_list) def downgrade() -> None: From 8a211280bc3bcf0c4eab8bb92efac612ada662cc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 17 Jun 2025 12:57:59 +0200 Subject: [PATCH 3/6] feat: Add downgrade migration --- .../ab7e313804ae_permission_system_rework.py | 115 +++++++++++++----- 1 file changed, 86 insertions(+), 29 deletions(-) diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py index 6ebf1f2597..c48c9a2610 100644 --- a/alembic/versions/ab7e313804ae_permission_system_rework.py +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -9,7 +9,7 @@ from typing import Sequence, Union from alembic import op import sqlalchemy as sa -from sqlalchemy.dialects import postgresql +from sqlalchemy import UUID from datetime import datetime, timezone from uuid import uuid4 @@ -20,32 +20,60 @@ depends_on: Union[str, Sequence[str], None] = None -def _create_dataset_permission(conn, user_id, dataset_id, permission_name) -> dict: +def _now(): + return datetime.now(timezone.utc) + + +def _ensure_permission(conn, permission_name) -> str: + """ + Return the permission.id for the given name, creating the row if needed. + """ from cognee.modules.users.models import Permission - permission = conn.execute( - sa.select(Permission).filter(Permission.name == permission_name) - ).fetchone() + row = conn.execute(sa.select(Permission).filter(Permission.name == permission_name)).fetchone() + + if row is None: + permission_id = uuid4() + op.bulk_insert( + Permission.__table__, + [ + { + "id": permission_id, + "name": permission_name, + "created_at": _now(), + "updated_at": _now(), + } + ], + ) + return permission_id + + return row.id - if permission is None: - permission = Permission(name=permission_name) +def _build_acl_row(*, user_id, target_id, permission_id, target_col) -> dict: + """Create a dict with the correct column names for the ACL row.""" return { "id": uuid4(), - "create_at": datetime.now(timezone.utc), - "updated_at": datetime.now(timezone.utc), + "created_at": _now(), + "updated_at": _now(), "principal_id": user_id, - "dataset_id": dataset_id, - "permission_id": permission.id, + target_col: target_id, + "permission_id": permission_id, } -def _uuid_type(): - """Return a UUID-compatible column type for the current dialect.""" - if op.get_bind().dialect.name == "postgresql": - return postgresql.UUID(as_uuid=True) - # SQLite (and others): fall back to CHAR(36) – application inserts uuid4() - return sa.CHAR(36) +def _create_dataset_permission(conn, user_id, dataset_id, permission_name): + perm_id = _ensure_permission(conn, permission_name) + return _build_acl_row( + user_id=user_id, target_id=dataset_id, permission_id=perm_id, target_col="dataset_id" + ) + + +def _create_data_permission(conn, user_id, data_id, permission_name): + perm_id = _ensure_permission(conn, permission_name) + return _build_acl_row( + user_id=user_id, target_id=data_id, permission_id=perm_id, target_col="data_id" + ) def upgrade() -> None: @@ -54,17 +82,16 @@ def upgrade() -> None: # Recreate ACLs table with default permissions set to datasets instead of documents op.drop_table("acls") - uuid_type = _uuid_type() - op.create_table( + acls_table = op.create_table( "acls", - sa.Column("id", uuid_type, primary_key=True, nullable=False), + sa.Column("id", UUID, primary_key=True, nullable=False, default=uuid4), sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("principal_id", uuid_type, sa.ForeignKey("principals.id"), nullable=True), - sa.Column("permission_id", uuid_type, sa.ForeignKey("permissions.id"), nullable=True), + sa.Column("principal_id", UUID, sa.ForeignKey("principals.id"), nullable=True), + sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id"), nullable=True), sa.Column( "dataset_id", - uuid_type, + UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE"), nullable=True, ), @@ -86,12 +113,42 @@ def upgrade() -> None: acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete")) if acl_list: - from cognee.modules.users.models import ACL - - op.bulk_insert(ACL.__table__, acl_list) + op.bulk_insert(acls_table, acl_list) def downgrade() -> None: - # op.drop_table('acls') - # op.create_table('acls') - pass + conn = op.get_bind() + + op.drop_table("acls") + + acls_table = op.create_table( + "acls", + sa.Column("id", UUID, primary_key=True, nullable=False, default=uuid4), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("principal_id", UUID, sa.ForeignKey("principals.id"), nullable=True), + sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id"), nullable=True), + sa.Column( + "data_id", + UUID, + sa.ForeignKey("data.id", ondelete="CASCADE"), + nullable=True, + ), + ) + + from cognee.modules.data.models import Data + + data = conn.execute(sa.select(Data)).fetchall() + + if not data: + return + + acl_list = [] + for single_data in data: + acl_list.append(_create_data_permission(conn, single_data.owner_id, single_data.id, "read")) + acl_list.append( + _create_data_permission(conn, single_data.owner_id, single_data.id, "write") + ) + + if acl_list: + op.bulk_insert(acls_table, acl_list) From facb303636f1a039f5349188c903070697f56147 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 17 Jun 2025 13:14:46 +0200 Subject: [PATCH 4/6] refactor: add comment --- alembic/versions/ab7e313804ae_permission_system_rework.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py index c48c9a2610..52d926fb79 100644 --- a/alembic/versions/ab7e313804ae_permission_system_rework.py +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -34,6 +34,8 @@ def _ensure_permission(conn, permission_name) -> str: if row is None: permission_id = uuid4() + # TODO: The Permission table might change in future version of Cognee, + # if this happens this migration version won't work anymore. Not sure what to do op.bulk_insert( Permission.__table__, [ From fb1daf147998e62aac53d8c73754e08aeaa9daba Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 17 Jun 2025 14:29:48 +0200 Subject: [PATCH 5/6] refactor: Add permission table definition for point in time --- .../ab7e313804ae_permission_system_rework.py | 54 ++++++++++++------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py index 52d926fb79..5df4dff649 100644 --- a/alembic/versions/ab7e313804ae_permission_system_rework.py +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -34,10 +34,26 @@ def _ensure_permission(conn, permission_name) -> str: if row is None: permission_id = uuid4() - # TODO: The Permission table might change in future version of Cognee, - # if this happens this migration version won't work anymore. Not sure what to do + + permissions_table = sa.Table( + "acls", + sa.MetaData(), + sa.Column("id", UUID, primary_key=True, default=uuid4), + sa.Column( + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), + sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), + sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), + ) + op.bulk_insert( - Permission.__table__, + permissions_table, [ { "id": permission_id, @@ -86,17 +102,16 @@ def upgrade() -> None: acls_table = op.create_table( "acls", - sa.Column("id", UUID, primary_key=True, nullable=False, default=uuid4), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("principal_id", UUID, sa.ForeignKey("principals.id"), nullable=True), - sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id"), nullable=True), + sa.Column("id", UUID, primary_key=True, default=uuid4), sa.Column( - "dataset_id", - UUID, - sa.ForeignKey("datasets.id", ondelete="CASCADE"), - nullable=True, + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) ), + sa.Column( + "updated_at", sa.DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc) + ), + sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), + sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), + sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), ) from cognee.modules.data.models import Dataset @@ -126,16 +141,15 @@ def downgrade() -> None: acls_table = op.create_table( "acls", sa.Column("id", UUID, primary_key=True, nullable=False, default=uuid4), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("principal_id", UUID, sa.ForeignKey("principals.id"), nullable=True), - sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id"), nullable=True), sa.Column( - "data_id", - UUID, - sa.ForeignKey("data.id", ondelete="CASCADE"), - nullable=True, + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) + ), + sa.Column( + "updated_at", sa.DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc) ), + sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), + sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), + sa.Column("data_id", UUID, sa.ForeignKey("data.id", ondelete="CASCADE")), ) from cognee.modules.data.models import Data From 03074f0faaef810dcf1e37d4cb113bcda091c691 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 17 Jun 2025 16:04:56 +0200 Subject: [PATCH 6/6] feat: Add migration that doesn't import cognee models --- .../ab7e313804ae_permission_system_rework.py | 108 +++++++++++++----- 1 file changed, 80 insertions(+), 28 deletions(-) diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py index 5df4dff649..bd69b9b414 100644 --- a/alembic/versions/ab7e313804ae_permission_system_rework.py +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -24,34 +24,86 @@ def _now(): return datetime.now(timezone.utc) +def _define_dataset_table() -> sa.Table: + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + table = sa.Table( + "datasets", + sa.MetaData(), + sa.Column("id", UUID, primary_key=True, default=uuid4), + sa.Column("name", sa.Text), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + sa.Column("owner_id", UUID, sa.ForeignKey("principals.id"), index=True), + ) + + return table + + +def _define_data_table() -> sa.Table: + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + table = sa.Table( + "data", + sa.MetaData(), + sa.Column("id", UUID, primary_key=True, default=uuid4), + sa.Column("name", sa.String), + sa.Column("extension", sa.String), + sa.Column("mime_type", sa.String), + sa.Column("raw_data_location", sa.String), + sa.Column("owner_id", UUID, index=True), + sa.Column("content_hash", sa.String), + sa.Column("external_metadata", sa.JSON), + sa.Column("node_set", sa.JSON, nullable=True), # list of strings + sa.Column("token_count", sa.Integer), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + ) + + return table + + def _ensure_permission(conn, permission_name) -> str: """ Return the permission.id for the given name, creating the row if needed. """ - from cognee.modules.users.models import Permission - - row = conn.execute(sa.select(Permission).filter(Permission.name == permission_name)).fetchone() + permissions_table = sa.Table( + "permissions", + sa.MetaData(), + sa.Column("id", UUID, primary_key=True, index=True, default=uuid4), + sa.Column( + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + sa.Column("name", sa.String, unique=True, nullable=False, index=True), + ) + row = conn.execute( + sa.select(permissions_table).filter(permissions_table.c.name == permission_name) + ).fetchone() if row is None: permission_id = uuid4() - permissions_table = sa.Table( - "acls", - sa.MetaData(), - sa.Column("id", UUID, primary_key=True, default=uuid4), - sa.Column( - "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) - ), - sa.Column( - "updated_at", - sa.DateTime(timezone=True), - onupdate=lambda: datetime.now(timezone.utc), - ), - sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), - sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), - sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), - ) - op.bulk_insert( permissions_table, [ @@ -59,7 +111,6 @@ def _ensure_permission(conn, permission_name) -> str: "id": permission_id, "name": permission_name, "created_at": _now(), - "updated_at": _now(), } ], ) @@ -73,7 +124,6 @@ def _build_acl_row(*, user_id, target_id, permission_id, target_col) -> dict: return { "id": uuid4(), "created_at": _now(), - "updated_at": _now(), "principal_id": user_id, target_col: target_id, "permission_id": permission_id, @@ -114,9 +164,10 @@ def upgrade() -> None: sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), ) - from cognee.modules.data.models import Dataset - - datasets = conn.execute(sa.select(Dataset)).fetchall() + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + dataset_table = _define_dataset_table() + datasets = conn.execute(sa.select(dataset_table)).fetchall() if not datasets: return @@ -152,9 +203,10 @@ def downgrade() -> None: sa.Column("data_id", UUID, sa.ForeignKey("data.id", ondelete="CASCADE")), ) - from cognee.modules.data.models import Data - - data = conn.execute(sa.select(Data)).fetchall() + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + data_table = _define_data_table() + data = conn.execute(sa.select(data_table)).fetchall() if not data: return