From 672b440071b6c116119fd4b1ab816e032056fc9e Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Fri, 19 Sep 2025 10:33:18 -0700 Subject: [PATCH 001/173] fix: traverse target snapshots instead of all snapshots (#5416) --- sqlmesh/core/snapshot/evaluator.py | 2 +- tests/core/test_snapshot_evaluator.py | 57 +++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 658bb1c400..86fa897005 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -494,7 +494,7 @@ def migrate( with self.concurrent_context(): # Only migrate snapshots for which there's an existing data object concurrent_apply_to_snapshots( - snapshots_by_name.values(), + target_snapshots, lambda s: self._migrate_snapshot( s, snapshots_by_name, diff --git a/tests/core/test_snapshot_evaluator.py b/tests/core/test_snapshot_evaluator.py index 1c3d1e6adc..5e7b078787 100644 --- a/tests/core/test_snapshot_evaluator.py +++ b/tests/core/test_snapshot_evaluator.py @@ -3955,6 +3955,63 @@ def test_migrate_snapshot(snapshot: Snapshot, mocker: MockerFixture, adapter_moc ) +def test_migrate_only_processes_target_snapshots( + mocker: MockerFixture, adapter_mock, make_snapshot +): + evaluator = SnapshotEvaluator(adapter_mock) + + target_model = SqlModel( + name="test_schema.target_model", + kind=FullKind(), + query=parse_one("SELECT 1 AS a"), + ) + extra_model = SqlModel( + name="test_schema.extra_model", + kind=FullKind(), + query=parse_one("SELECT 1 AS a"), + ) + + target_snapshot = make_snapshot(target_model) + extra_snapshot = make_snapshot(extra_model) + target_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + extra_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + target_snapshots = [target_snapshot] + snapshots = { + target_snapshot.snapshot_id: target_snapshot, + extra_snapshot.snapshot_id: extra_snapshot, + } + + mocker.patch.object( + evaluator, + "_get_data_objects", + return_value={target_snapshot.snapshot_id: mocker.Mock()}, + ) + migrate_mock = mocker.patch.object(evaluator, "_migrate_snapshot") + + def apply_side_effect(snapshot_iterable, fn, *_args, **_kwargs): + for snapshot in snapshot_iterable: + fn(snapshot) + return ([], []) + + apply_mock = mocker.patch( + "sqlmesh.core.snapshot.evaluator.concurrent_apply_to_snapshots", + side_effect=apply_side_effect, + ) + + evaluator.migrate(target_snapshots=target_snapshots, snapshots=snapshots) + + assert apply_mock.call_count == 1 + called_snapshots = list(apply_mock.call_args.args[0]) + assert called_snapshots == target_snapshots + + migrate_mock.assert_called_once() + called_snapshot, snapshots_by_name, *_ = migrate_mock.call_args.args + assert called_snapshot is target_snapshot + assert target_snapshot.name in snapshots_by_name + assert extra_snapshot.name in snapshots_by_name + + def test_migrate_managed(adapter_mock, make_snapshot, mocker: MockerFixture): evaluator = SnapshotEvaluator(adapter_mock) From 50aee2cc8f3d3d356ddeff0138470c9984d17167 Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Fri, 19 Sep 2025 21:18:26 +0300 Subject: [PATCH 002/173] Fix: Pass a copy of the properties to avoid mutation of actual dict (#5417) --- sqlmesh/core/snapshot/evaluator.py | 4 +- tests/core/test_snapshot_evaluator.py | 76 +++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 86fa897005..688f9d8d5b 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -763,7 +763,7 @@ def _evaluate_snapshot( snapshots=snapshots, deployability_index=deployability_index, render_kwargs=create_render_kwargs, - rendered_physical_properties=rendered_physical_properties, + rendered_physical_properties=rendered_physical_properties.copy(), allow_destructive_snapshots=allow_destructive_snapshots, allow_additive_snapshots=allow_additive_snapshots, ) @@ -776,7 +776,7 @@ def _evaluate_snapshot( is_table_deployable=is_snapshot_deployable, deployability_index=deployability_index, create_render_kwargs=create_render_kwargs, - rendered_physical_properties=rendered_physical_properties, + rendered_physical_properties=rendered_physical_properties.copy(), dry_run=False, run_pre_post_statements=False, ) diff --git a/tests/core/test_snapshot_evaluator.py b/tests/core/test_snapshot_evaluator.py index 5e7b078787..66128cfeee 100644 --- a/tests/core/test_snapshot_evaluator.py +++ b/tests/core/test_snapshot_evaluator.py @@ -4737,3 +4737,79 @@ def test_wap_publish_failure(adapter_mock: Mock, make_snapshot: t.Callable[..., # Execute audit with WAP ID and expect it to raise the exception with pytest.raises(Exception, match="WAP publish failed"): evaluator.audit(snapshot, snapshots={}, wap_id=wap_id) + + +def test_properties_are_preserved_in_both_create_statements( + adapter_mock: Mock, make_snapshot: t.Callable[..., Snapshot] +) -> None: + # the below mocks are needed to create a situation + # where we trigger two create statements during evaluation + transaction_mock = Mock() + transaction_mock.__enter__ = Mock() + transaction_mock.__exit__ = Mock() + session_mock = Mock() + session_mock.__enter__ = Mock() + session_mock.__exit__ = Mock() + adapter_mock = Mock() + adapter_mock.transaction.return_value = transaction_mock + adapter_mock.session.return_value = session_mock + adapter_mock.dialect = "trino" + adapter_mock.HAS_VIEW_BINDING = False + adapter_mock.wap_supported.return_value = False + adapter_mock.get_data_objects.return_value = [] + adapter_mock.with_settings.return_value = adapter_mock + adapter_mock.table_exists.return_value = False + + props = [] + + def mutate_view_properties(*args, **kwargs): + view_props = kwargs.get("view_properties") + if isinstance(view_props, dict): + props.append(view_props["creatable_type"].sql()) + # simulate view pop + view_props.pop("creatable_type") + return None + + adapter_mock.create_view.side_effect = mutate_view_properties + + evaluator = SnapshotEvaluator(adapter_mock) + + # create a view model with SECURITY INVOKER physical property + # AND self referenctial to trigger two create statements + model = load_sql_based_model( + parse( # type: ignore + """ + MODEL ( + name test_schema.security_view, + kind VIEW, + physical_properties ( + 'creatable_type' = 'SECURITY INVOKER' + ) + ); + + SELECT 1 as col from test_schema.security_view; + """ + ), + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator.evaluate( + snapshot, + start="2024-01-01", + end="2024-01-02", + execution_time="2024-01-02", + snapshots={}, + ) + + # Verify create_view was called twice + assert adapter_mock.create_view.call_count == 2 + first_call = adapter_mock.create_view.call_args_list[0] + second_call = adapter_mock.create_view.call_args_list[1] + + # First call should be CREATE VIEW (replace=False) second CREATE OR REPLACE VIEW (replace=True) + assert first_call.kwargs.get("replace") == False + assert second_call.kwargs.get("replace") == True + + # Both calls should have view_properties with security invoker + assert props == ["'SECURITY INVOKER'", "'SECURITY INVOKER'"] From d41c3e0e3a6c010babf75a9b46795834a0c56730 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Fri, 19 Sep 2025 15:06:50 -0700 Subject: [PATCH 003/173] Fix: Disable WAP support by default for Spark and Iceberg (#5415) --- sqlmesh/core/config/connection.py | 5 +++++ sqlmesh/core/engine_adapter/base.py | 5 +++++ sqlmesh/core/engine_adapter/spark.py | 14 +++++++------ sqlmesh/core/snapshot/evaluator.py | 1 + tests/core/engine_adapter/test_spark.py | 28 +++++++++++++++++-------- 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 553ffd58a5..dbda66614e 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1755,6 +1755,7 @@ class SparkConnectionConfig(ConnectionConfig): config_dir: t.Optional[str] = None catalog: t.Optional[str] = None config: t.Dict[str, t.Any] = {} + wap_enabled: bool = False concurrent_tasks: int = 4 register_comments: bool = True @@ -1801,6 +1802,10 @@ def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: .getOrCreate(), } + @property + def _extra_engine_config(self) -> t.Dict[str, t.Any]: + return {"wap_enabled": self.wap_enabled} + class TrinoAuthenticationMethod(str, Enum): NO_AUTH = "no-auth" diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index d8747c979d..47e6a4260c 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -2357,6 +2357,11 @@ def fetch_pyspark_df( """Fetches a PySpark DataFrame from the cursor""" raise NotImplementedError(f"Engine does not support PySpark DataFrames: {type(self)}") + @property + def wap_enabled(self) -> bool: + """Returns whether WAP is enabled for this engine.""" + return self._extra_config.get("wap_enabled", False) + def wap_supported(self, table_name: TableName) -> bool: """Returns whether WAP for the target table is supported.""" return False diff --git a/sqlmesh/core/engine_adapter/spark.py b/sqlmesh/core/engine_adapter/spark.py index 7d6a4d969b..18ba6ea106 100644 --- a/sqlmesh/core/engine_adapter/spark.py +++ b/sqlmesh/core/engine_adapter/spark.py @@ -457,12 +457,14 @@ def _create_table( if wap_id.startswith(f"{self.BRANCH_PREFIX}{self.WAP_PREFIX}"): table_name.set("this", table_name.this.this) - wap_supported = ( - kwargs.get("storage_format") or "" - ).lower() == "iceberg" or self.wap_supported(table_name) - do_dummy_insert = ( - False if not wap_supported or not exists else not self.table_exists(table_name) - ) + do_dummy_insert = False + if self.wap_enabled: + wap_supported = ( + kwargs.get("storage_format") or "" + ).lower() == "iceberg" or self.wap_supported(table_name) + do_dummy_insert = ( + False if not wap_supported or not exists else not self.table_exists(table_name) + ) super()._create_table( table_name_or_schema, expression, diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 688f9d8d5b..e22b1a850b 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -793,6 +793,7 @@ def _evaluate_snapshot( if ( snapshot.is_materialized and target_table_exists + and adapter.wap_enabled and (model.wap_supported or adapter.wap_supported(target_table_name)) ): wap_id = random_id()[0:8] diff --git a/tests/core/engine_adapter/test_spark.py b/tests/core/engine_adapter/test_spark.py index f1929639a2..bc4e352bd7 100644 --- a/tests/core/engine_adapter/test_spark.py +++ b/tests/core/engine_adapter/test_spark.py @@ -66,14 +66,15 @@ def test_create_table_properties(make_mocked_engine_adapter: t.Callable): ) +@pytest.mark.parametrize("wap_enabled", [True, False]) def test_replace_query_table_properties_not_exists( - mocker: MockerFixture, make_mocked_engine_adapter: t.Callable + mocker: MockerFixture, make_mocked_engine_adapter: t.Callable, wap_enabled: bool ): mocker.patch( "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.table_exists", return_value=False, ) - adapter = make_mocked_engine_adapter(SparkEngineAdapter) + adapter = make_mocked_engine_adapter(SparkEngineAdapter, wap_enabled=wap_enabled) columns_to_types = { "cola": exp.DataType.build("INT"), @@ -89,10 +90,13 @@ def test_replace_query_table_properties_not_exists( table_properties={"a": exp.convert(1)}, ) - assert to_sql_calls(adapter) == [ + expected_sql_calls = [ "CREATE TABLE IF NOT EXISTS `test_table` USING ICEBERG PARTITIONED BY (`colb`) TBLPROPERTIES ('a'=1) AS SELECT CAST(`cola` AS INT) AS `cola`, CAST(`colb` AS STRING) AS `colb`, CAST(`colc` AS STRING) AS `colc` FROM (SELECT 1 AS `cola`, '2' AS `colb`, '3' AS `colc`) AS `_subquery`", - "INSERT INTO `test_table` SELECT * FROM `test_table`", ] + if wap_enabled: + expected_sql_calls.append("INSERT INTO `test_table` SELECT * FROM `test_table`") + + assert to_sql_calls(adapter) == expected_sql_calls def test_replace_query_table_properties_exists( @@ -825,13 +829,16 @@ def test_wap_publish(make_mocked_engine_adapter: t.Callable, mocker: MockerFixtu ) -def test_create_table_iceberg(mocker: MockerFixture, make_mocked_engine_adapter: t.Callable): +@pytest.mark.parametrize("wap_enabled", [True, False]) +def test_create_table_iceberg( + mocker: MockerFixture, make_mocked_engine_adapter: t.Callable, wap_enabled: bool +): mocker.patch( "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.table_exists", return_value=False, ) - adapter = make_mocked_engine_adapter(SparkEngineAdapter) + adapter = make_mocked_engine_adapter(SparkEngineAdapter, wap_enabled=wap_enabled) columns_to_types = { "cola": exp.DataType.build("INT"), @@ -846,10 +853,13 @@ def test_create_table_iceberg(mocker: MockerFixture, make_mocked_engine_adapter: storage_format="ICEBERG", ) - assert to_sql_calls(adapter) == [ + expected_sql_calls = [ "CREATE TABLE IF NOT EXISTS `test_table` (`cola` INT, `colb` STRING, `colc` STRING) USING ICEBERG PARTITIONED BY (`colb`)", - "INSERT INTO `test_table` SELECT * FROM `test_table`", ] + if wap_enabled: + expected_sql_calls.append("INSERT INTO `test_table` SELECT * FROM `test_table`") + + assert to_sql_calls(adapter) == expected_sql_calls def test_comments_hive(mocker: MockerFixture, make_mocked_engine_adapter: t.Callable): @@ -973,7 +983,7 @@ def test_create_table_with_wap(make_mocked_engine_adapter: t.Callable, mocker: M "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.table_exists", return_value=False, ) - adapter = make_mocked_engine_adapter(SparkEngineAdapter) + adapter = make_mocked_engine_adapter(SparkEngineAdapter, wap_enabled=True) adapter.create_table( "catalog.schema.table.branch_wap_12345", From 6d00e35ca0dd0e9f85a675a3136b3a52f9b551dc Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Mon, 22 Sep 2025 15:58:06 -0700 Subject: [PATCH 004/173] Fix: Reporting deletion of physical tables for snapshots of symbolic / audit models (#5422) --- sqlmesh/core/snapshot/evaluator.py | 4 ++- tests/core/test_snapshot_evaluator.py | 39 ++++++++++++++++++++++- tests/integrations/jupyter/test_magics.py | 3 -- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index e22b1a850b..baf4dd67f1 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -518,10 +518,12 @@ def cleanup( target_snapshots: Snapshots to cleanup. on_complete: A callback to call on each successfully deleted database object. """ + target_snapshots = [ + t for t in target_snapshots if t.snapshot.is_model and not t.snapshot.is_symbolic + ] snapshots_to_dev_table_only = { t.snapshot.snapshot_id: t.dev_table_only for t in target_snapshots } - with self.concurrent_context(): concurrent_apply_to_snapshots( [t.snapshot for t in target_snapshots], diff --git a/tests/core/test_snapshot_evaluator.py b/tests/core/test_snapshot_evaluator.py index 66128cfeee..2df91afb10 100644 --- a/tests/core/test_snapshot_evaluator.py +++ b/tests/core/test_snapshot_evaluator.py @@ -436,10 +436,14 @@ def create_and_cleanup(name: str, dev_table_only: bool): snapshot.categorize_as(SnapshotChangeCategory.BREAKING, forward_only=True) snapshot.version = "test_version" + on_cleanup_mock = mocker.Mock() + evaluator.promote([snapshot], EnvironmentNamingInfo(name="test_env")) evaluator.cleanup( - [SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=dev_table_only)] + [SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=dev_table_only)], + on_complete=on_cleanup_mock, ) + assert on_cleanup_mock.call_count == 1 if dev_table_only else 2 return snapshot snapshot = create_and_cleanup("catalog.test_schema.test_model", True) @@ -611,6 +615,39 @@ def create_and_cleanup_external_model(name: str, dev_table_only: bool): adapter_mock.drop_table.assert_not_called() +def test_cleanup_symbolic_and_audit_snapshots_no_callback( + mocker: MockerFixture, adapter_mock, make_snapshot +): + evaluator = SnapshotEvaluator(adapter_mock) + on_complete_mock = mocker.Mock() + + # Test external model + external_model = ExternalModel( + name="test_schema.external_model", + kind=ExternalKind(), + ) + external_snapshot = make_snapshot(external_model) + external_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + # Test standalone audit + audit = StandaloneAudit(name="test_audit", query=parse_one("SELECT NULL LIMIT 0")) + audit_snapshot = make_snapshot(audit) + audit_snapshot.categorize_as(SnapshotChangeCategory.NON_BREAKING) + + evaluator.cleanup( + [ + SnapshotTableCleanupTask(snapshot=external_snapshot.table_info, dev_table_only=False), + SnapshotTableCleanupTask(snapshot=audit_snapshot.table_info, dev_table_only=False), + ], + on_complete=on_complete_mock, + ) + + # Verify that no physical tables were attempted to be dropped + adapter_mock.drop_table.assert_not_called() + adapter_mock.get_data_object.assert_not_called() + on_complete_mock.assert_not_called() + + @pytest.mark.parametrize("view_exists", [True, False]) def test_evaluate_materialized_view( mocker: MockerFixture, adapter_mock, make_snapshot, view_exists: bool diff --git a/tests/integrations/jupyter/test_magics.py b/tests/integrations/jupyter/test_magics.py index 0a39c155cf..991df8fc15 100644 --- a/tests/integrations/jupyter/test_magics.py +++ b/tests/integrations/jupyter/test_magics.py @@ -906,9 +906,6 @@ def test_destroy( "Are you ABSOLUTELY SURE you want to proceed with deletion? [y/n]:", "Environment 'prod' invalidated.", "Deleted object memory.sushi", - 'Deleted object "memory"."raw"."model1"', - 'Deleted object "memory"."raw"."model2"', - 'Deleted object "memory"."raw"."demographics"', "State tables removed.", "Destroy completed successfully.", ] From 012e5426befbc9ddb8aabde6332a8da1a30338b9 Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:32:31 -0700 Subject: [PATCH 005/173] fix: lowercase column names snowflake (#5425) --- sqlmesh/core/engine_adapter/snowflake.py | 3 +- tests/core/engine_adapter/test_snowflake.py | 35 ++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py index c6b0e71ac3..355fb9719c 100644 --- a/sqlmesh/core/engine_adapter/snowflake.py +++ b/sqlmesh/core/engine_adapter/snowflake.py @@ -526,7 +526,8 @@ def _get_data_objects( type=DataObjectType.from_str(row.type), # type: ignore clustering_key=row.clustering_key, # type: ignore ) - for row in df.itertuples() + # lowercase the column names for cases where Snowflake might return uppercase column names for certain catalogs + for row in df.rename(columns={col: col.lower() for col in df.columns}).itertuples() ] def set_current_catalog(self, catalog: str) -> None: diff --git a/tests/core/engine_adapter/test_snowflake.py b/tests/core/engine_adapter/test_snowflake.py index 75ce8edbe0..62c4a4f3eb 100644 --- a/tests/core/engine_adapter/test_snowflake.py +++ b/tests/core/engine_adapter/test_snowflake.py @@ -7,9 +7,10 @@ import sqlmesh.core.dialect as d from sqlmesh.core.dialect import normalize_model_name +from sqlmesh.core.engine_adapter import SnowflakeEngineAdapter from sqlmesh.core.engine_adapter.base import EngineAdapter +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.model import load_sql_based_model -from sqlmesh.core.engine_adapter import SnowflakeEngineAdapter from sqlmesh.core.model.definition import SqlModel from sqlmesh.core.node import IntervalUnit from sqlmesh.utils.errors import SQLMeshError @@ -39,6 +40,38 @@ def test_get_temp_table(mocker: MockerFixture, make_mocked_engine_adapter: t.Cal assert value.sql(dialect=adapter.dialect) == '"CATALOG"."DB"."__temp_TEST_TABLE_abcdefgh"' +def test_get_data_objects_lowercases_columns( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +) -> None: + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter, patch_get_data_objects=False) + + adapter.get_current_catalog = mocker.Mock(return_value="TEST_CATALOG") + + adapter.fetchdf = mocker.Mock( + return_value=pd.DataFrame( # type: ignore[assignment] + [ + { + "CATALOG": "TEST_CATALOG", + "NAME": "MY_TABLE", + "SCHEMA_NAME": "PUBLIC", + "TYPE": "TABLE", + "CLUSTERING_KEY": "ID", + } + ] + ) + ) + + data_objects = adapter._get_data_objects("TEST_CATALOG.PUBLIC") + + assert len(data_objects) == 1 + data_object = data_objects[0] + assert data_object.catalog == "TEST_CATALOG" + assert data_object.schema_name == "PUBLIC" + assert data_object.name == "MY_TABLE" + assert data_object.type == DataObjectType.TABLE + assert data_object.clustering_key == "ID" + + @pytest.mark.parametrize( "current_warehouse, current_warehouse_exp, configured_warehouse, configured_warehouse_exp, should_change", [ From bf40bbeafc07723d76d64da44c635387cda57b62 Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Mon, 22 Sep 2025 19:18:36 -0700 Subject: [PATCH 006/173] chore: fix dbt ci tests (#5426) --- Makefile | 11 ++++++++++- pyproject.toml | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96305c4bfb..40874f7972 100644 --- a/Makefile +++ b/Makefile @@ -36,11 +36,16 @@ install-dev-dbt-%: if [ "$$version" = "1.10.0" ]; then \ echo "Applying special handling for dbt 1.10.0"; \ $(SED_INPLACE) -E 's/"(dbt-core)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \ - $(SED_INPLACE) -E 's/"(dbt-(bigquery|duckdb|snowflake|athena-community|clickhouse|databricks|redshift|trino))[^"]*"/"\1"/g' pyproject.toml; \ + $(SED_INPLACE) -E 's/"(dbt-(bigquery|duckdb|snowflake|athena-community|clickhouse|redshift|trino))[^"]*"/"\1"/g' pyproject.toml; \ + $(SED_INPLACE) -E 's/"(dbt-databricks)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \ else \ echo "Applying version $$version to all dbt packages"; \ $(SED_INPLACE) -E 's/"(dbt-[^"><=~!]+)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \ fi; \ + if printf '%s\n' "$$version" | awk -F. '{ if ($$1 == 1 && (($$2 >= 3 && $$2 <= 5) || $$2 == 10)) exit 0; exit 1 }'; then \ + echo "Applying numpy<2 constraint for dbt $$version"; \ + $(SED_INPLACE) 's/"numpy"/"numpy<2"/g' pyproject.toml; \ + fi; \ $(MAKE) install-dev; \ if [ "$$version" = "1.6.0" ]; then \ echo "Applying overrides for dbt 1.6.0"; \ @@ -50,6 +55,10 @@ install-dev-dbt-%: echo "Applying overrides for dbt 1.7.0"; \ $(PIP) install 'databricks-sdk==0.28.0' --reinstall; \ fi; \ + if [ "$$version" = "1.5.0" ]; then \ + echo "Applying overrides for dbt 1.5.0"; \ + $(PIP) install 'dbt-databricks==1.5.6' 'numpy<2' --reinstall; \ + fi; \ mv pyproject.toml.backup pyproject.toml; \ echo "Restored original pyproject.toml" diff --git a/pyproject.toml b/pyproject.toml index bd34906434..1d34b340b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ dev = [ "google-cloud-bigquery-storage", "httpx", "mypy~=1.13.0", + "numpy", "pandas-stubs", "pre-commit", "psycopg2-binary", From e00e86064995841f097b62c9d32b6e99ed2a0aba Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Mon, 22 Sep 2025 19:35:26 -0700 Subject: [PATCH 007/173] chore: fix tests to include dataframe (#5423) --- sqlmesh/core/engine_adapter/clickhouse.py | 3 ++- tests/core/engine_adapter/integration/conftest.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/engine_adapter/clickhouse.py b/sqlmesh/core/engine_adapter/clickhouse.py index ccffe64118..84d6ad311e 100644 --- a/sqlmesh/core/engine_adapter/clickhouse.py +++ b/sqlmesh/core/engine_adapter/clickhouse.py @@ -112,8 +112,9 @@ def query_factory() -> Query: storage_format=exp.var("MergeTree"), **kwargs, ) + ordered_df = df[list(source_columns_to_types)] - self.cursor.client.insert_df(temp_table.sql(dialect=self.dialect), df=df) + self.cursor.client.insert_df(temp_table.sql(dialect=self.dialect), df=ordered_df) return exp.select(*self._casted_columns(target_columns_to_types, source_columns)).from_( temp_table diff --git a/tests/core/engine_adapter/integration/conftest.py b/tests/core/engine_adapter/integration/conftest.py index eafdf2fe1d..30f934da63 100644 --- a/tests/core/engine_adapter/integration/conftest.py +++ b/tests/core/engine_adapter/integration/conftest.py @@ -148,7 +148,7 @@ def ctx_df( yield from create_test_context(*request.param) -@pytest.fixture(params=list(generate_pytest_params(ENGINES, query=True, df=False))) +@pytest.fixture(params=list(generate_pytest_params(ENGINES, query=True, df=True))) def ctx_query_and_df( request: FixtureRequest, create_test_context: t.Callable[[IntegrationTestEngine, str], t.Iterable[TestContext]], From 34dc9fde5214b20f22a0dd6dbeab1a693293aeee Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Tue, 23 Sep 2025 15:23:13 +1200 Subject: [PATCH 008/173] Feat!: bring dbt node information through to SQLMesh (#5412) --- .circleci/continue_config.yml | 7 +- .circleci/test_migration.sh | 41 ++++--- examples/sushi_dbt/config.py | 2 + sqlmesh/cli/project_init.py | 2 +- sqlmesh/core/audit/definition.py | 10 +- sqlmesh/core/context.py | 4 +- sqlmesh/core/model/definition.py | 4 + sqlmesh/core/node.py | 103 ++++++++++++++++- sqlmesh/core/scheduler.py | 4 +- sqlmesh/dbt/basemodel.py | 17 +-- sqlmesh/dbt/model.py | 2 +- sqlmesh/dbt/seed.py | 2 +- sqlmesh/dbt/test.py | 11 ++ .../v0098_add_dbt_node_info_in_node.py | 105 ++++++++++++++++++ sqlmesh_dbt/console.py | 20 +++- sqlmesh_dbt/operations.py | 10 +- tests/core/test_audit.py | 24 +++- tests/core/test_model.py | 29 ++++- tests/dbt/cli/conftest.py | 75 ------------- tests/dbt/cli/test_list.py | 51 +++++---- tests/dbt/cli/test_operations.py | 8 +- tests/dbt/cli/test_run.py | 8 +- tests/dbt/conftest.py | 87 +++++++++++++++ tests/dbt/test_config.py | 8 ++ tests/dbt/test_integration.py | 64 +++++++++++ tests/dbt/test_model.py | 102 +++++------------ .../dbt}/empty_project/dbt_project.yml | 2 +- .../dbt}/empty_project/profiles.yml | 2 +- .../dbt}/jaffle_shop_duckdb/dbt_project.yml | 0 .../jaffle_shop_duckdb/models/customers.sql | 0 .../dbt}/jaffle_shop_duckdb/models/docs.md | 0 .../dbt}/jaffle_shop_duckdb/models/orders.sql | 0 .../jaffle_shop_duckdb/models/overview.md | 0 .../dbt}/jaffle_shop_duckdb/models/schema.yml | 0 .../models/staging/schema.yml | 0 .../models/staging/stg_customers.sql | 0 .../models/staging/stg_orders.sql | 0 .../models/staging/stg_payments.sql | 0 .../dbt}/jaffle_shop_duckdb/profiles.yml | 0 .../dbt}/jaffle_shop_duckdb/seeds/.gitkeep | 0 .../seeds/raw_customers.csv | 0 .../jaffle_shop_duckdb/seeds/raw_orders.csv | 0 .../jaffle_shop_duckdb/seeds/raw_payments.csv | 0 43 files changed, 575 insertions(+), 229 deletions(-) create mode 100644 sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py rename tests/{dbt/cli/fixtures => fixtures/dbt}/empty_project/dbt_project.yml (94%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/empty_project/profiles.yml (85%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/dbt_project.yml (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/customers.sql (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/docs.md (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/orders.sql (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/overview.md (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/schema.yml (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/staging/schema.yml (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/staging/stg_customers.sql (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/staging/stg_orders.sql (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/models/staging/stg_payments.sql (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/profiles.yml (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/seeds/.gitkeep (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/seeds/raw_customers.csv (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/seeds/raw_orders.csv (100%) rename tests/{dbt/cli/fixtures => fixtures/dbt}/jaffle_shop_duckdb/seeds/raw_payments.csv (100%) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index e21f3d869b..c549c0ae78 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -144,8 +144,11 @@ jobs: - halt_unless_core - checkout - run: - name: Run the migration test - command: ./.circleci/test_migration.sh + name: Run the migration test - sushi + command: ./.circleci/test_migration.sh sushi "--gateway duckdb_persistent" + - run: + name: Run the migration test - sushi_dbt + command: ./.circleci/test_migration.sh sushi_dbt "--config migration_test_config" ui_style: docker: diff --git a/.circleci/test_migration.sh b/.circleci/test_migration.sh index a85d933bd3..9b8fe89e6e 100755 --- a/.circleci/test_migration.sh +++ b/.circleci/test_migration.sh @@ -1,11 +1,6 @@ #!/usr/bin/env bash set -ex -GATEWAY_NAME="duckdb_persistent" -TMP_DIR=$(mktemp -d) -SUSHI_DIR="$TMP_DIR/sushi" - - if [[ -z $(git tag --points-at HEAD) ]]; then # If the current commit is not tagged, we need to find the last tag LAST_TAG=$(git describe --tags --abbrev=0) @@ -14,28 +9,48 @@ else LAST_TAG=$(git tag --sort=-creatordate | head -n 2 | tail -n 1) fi +if [ "$1" == "" ]; then + echo "Usage: $0 " + echo "eg $0 sushi '--gateway duckdb_persistent'" + exit 1 +fi + + +TMP_DIR=$(mktemp -d) +EXAMPLE_NAME="$1" +SQLMESH_OPTS="$2" +EXAMPLE_DIR="./examples/$EXAMPLE_NAME" +TEST_DIR="$TMP_DIR/$EXAMPLE_NAME" + +echo "Running migration test for '$EXAMPLE_NAME' in '$TEST_DIR' for example project '$EXAMPLE_DIR' using options '$SQLMESH_OPTS'" + git checkout $LAST_TAG # Install dependencies from the previous release. make install-dev -cp -r ./examples/sushi $TMP_DIR +cp -r $EXAMPLE_DIR $TEST_DIR + +# this is only needed temporarily until the released tag for $LAST_TAG includes this config +if [ "$EXAMPLE_NAME" == "sushi_dbt" ]; then + echo 'migration_test_config = sqlmesh_config(Path(__file__).parent, dbt_target_name="duckdb")' >> $TEST_DIR/config.py +fi # Run initial plan -pushd $SUSHI_DIR +pushd $TEST_DIR rm -rf ./data/* -sqlmesh --gateway $GATEWAY_NAME plan --no-prompts --auto-apply +sqlmesh $SQLMESH_OPTS plan --no-prompts --auto-apply rm -rf .cache popd -# Switch back to the starting state of the repository +# Switch back to the starting state of the repository git checkout - # Install updated dependencies. make install-dev # Migrate and make sure the diff is empty -pushd $SUSHI_DIR -sqlmesh --gateway $GATEWAY_NAME migrate -sqlmesh --gateway $GATEWAY_NAME diff prod -popd +pushd $TEST_DIR +sqlmesh $SQLMESH_OPTS migrate +sqlmesh $SQLMESH_OPTS diff prod +popd \ No newline at end of file diff --git a/examples/sushi_dbt/config.py b/examples/sushi_dbt/config.py index e7e28c98e4..2305cf79f2 100644 --- a/examples/sushi_dbt/config.py +++ b/examples/sushi_dbt/config.py @@ -5,3 +5,5 @@ config = sqlmesh_config(Path(__file__).parent) test_config = config + +migration_test_config = sqlmesh_config(Path(__file__).parent, dbt_target_name="duckdb") diff --git a/sqlmesh/cli/project_init.py b/sqlmesh/cli/project_init.py index 0790562de7..6b4f6c7a83 100644 --- a/sqlmesh/cli/project_init.py +++ b/sqlmesh/cli/project_init.py @@ -298,6 +298,7 @@ def init_example_project( dlt_path: t.Optional[str] = None, schema_name: str = "sqlmesh_example", cli_mode: InitCliMode = InitCliMode.DEFAULT, + start: t.Optional[str] = None, ) -> Path: root_path = Path(path) @@ -336,7 +337,6 @@ def init_example_project( models: t.Set[t.Tuple[str, str]] = set() settings = None - start = None if engine_type and template == ProjectTemplate.DLT: project_dialect = dialect or DIALECT_TO_TYPE.get(engine_type) if pipeline and project_dialect: diff --git a/sqlmesh/core/audit/definition.py b/sqlmesh/core/audit/definition.py index 561ee539f6..9f470872fe 100644 --- a/sqlmesh/core/audit/definition.py +++ b/sqlmesh/core/audit/definition.py @@ -19,7 +19,7 @@ sorted_python_env_payloads, ) from sqlmesh.core.model.common import make_python_env, single_value_or_tuple, ParsableSql -from sqlmesh.core.node import _Node +from sqlmesh.core.node import _Node, DbtInfoMixin, DbtNodeInfo from sqlmesh.core.renderer import QueryRenderer from sqlmesh.utils.date import TimeLike from sqlmesh.utils.errors import AuditConfigError, SQLMeshError, raise_config_error @@ -120,7 +120,7 @@ def audit_map_validator(cls: t.Type, v: t.Any, values: t.Any) -> t.Dict[str, t.A return {} -class ModelAudit(PydanticModel, AuditMixin, frozen=True): +class ModelAudit(PydanticModel, AuditMixin, DbtInfoMixin, frozen=True): """ Audit is an assertion made about your tables. @@ -137,6 +137,7 @@ class ModelAudit(PydanticModel, AuditMixin, frozen=True): expressions_: t.Optional[t.List[ParsableSql]] = Field(default=None, alias="expressions") jinja_macros: JinjaMacroRegistry = JinjaMacroRegistry() formatting: t.Optional[bool] = Field(default=None, exclude=True) + dbt_node_info_: t.Optional[DbtNodeInfo] = Field(alias="dbt_node_info", default=None) _path: t.Optional[Path] = None @@ -150,6 +151,10 @@ def __str__(self) -> str: path = f": {self._path.name}" if self._path else "" return f"{self.__class__.__name__}<{self.name}{path}>" + @property + def dbt_node_info(self) -> t.Optional[DbtNodeInfo]: + return self.dbt_node_info_ + class StandaloneAudit(_Node, AuditMixin): """ @@ -552,4 +557,5 @@ def _maybe_parse_arg_pair(e: exp.Expression) -> t.Tuple[str, exp.Expression]: "depends_on_": lambda value: exp.Tuple(expressions=sorted(value)), "tags": single_value_or_tuple, "default_catalog": exp.to_identifier, + "dbt_node_info_": lambda value: value.to_expression(), } diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index d7a2984f3a..437fbd6edd 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -1697,9 +1697,9 @@ def plan_builder( console=self.console, user_provided_flags=user_provided_flags, selected_models={ - dbt_name + dbt_unique_id for model in model_selector.expand_model_selections(select_models or "*") - if (dbt_name := snapshots[model].node.dbt_name) + if (dbt_unique_id := snapshots[model].node.dbt_unique_id) }, explain=explain or False, ignore_cron=ignore_cron or False, diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py index c9eaa43b3e..974901cb55 100644 --- a/sqlmesh/core/model/definition.py +++ b/sqlmesh/core/model/definition.py @@ -1197,6 +1197,9 @@ def metadata_hash(self) -> str: for k, v in sorted(args.items()): metadata.append(f"{k}:{gen(v)}") + if self.dbt_node_info: + metadata.append(self.dbt_node_info.json(sort_keys=True)) + metadata.extend(self._additional_metadata) self._metadata_hash = hash_data(metadata) @@ -3019,6 +3022,7 @@ def render_expression( "formatting": str, "optimize_query": str, "virtual_environment_mode": lambda value: exp.Literal.string(value.value), + "dbt_node_info_": lambda value: value.to_expression(), } diff --git a/sqlmesh/core/node.py b/sqlmesh/core/node.py index b04a59a39f..4a3bf2564b 100644 --- a/sqlmesh/core/node.py +++ b/sqlmesh/core/node.py @@ -153,6 +153,101 @@ def milliseconds(self) -> int: return self.seconds * 1000 +class DbtNodeInfo(PydanticModel): + """ + Represents dbt-specific model information set by the dbt loader and intended to be made available at the Snapshot level + (as opposed to hidden within the individual model jinja macro registries). + + This allows for things like injecting implementations of variables / functions into the Jinja context that are compatible with + their dbt equivalents but are backed by the sqlmesh snapshots in any given plan / environment + """ + + unique_id: str + """This is the node/resource name/unique_id that's used as the node key in the dbt manifest. + It's prefixed by the resource type and is exposed in context variables like {{ selected_resources }}. + + Examples: + - test.jaffle_shop.unique_stg_orders_order_id.e3b841c71a + - seed.jaffle_shop.raw_payments + - model.jaffle_shop.stg_orders + """ + + name: str + """Name of this object in the dbt global namespace, used by things like {{ ref() }} calls. + + Examples: + - unique_stg_orders_order_id + - raw_payments + - stg_orders + """ + + fqn: str + """Used for selectors in --select/--exclude. + Takes the filesystem into account so may be structured differently to :unique_id. + + Examples: + - jaffle_shop.staging.unique_stg_orders_order_id + - jaffle_shop.raw_payments + - jaffle_shop.staging.stg_orders + """ + + alias: t.Optional[str] = None + """This is dbt's way of overriding the _physical table_ a model is written to. + + It's used in the following situation: + - Say you have two models, "stg_customers" and "customers" + - You want "stg_customers" to be written to the "staging" schema as eg "staging.customers" - NOT "staging.stg_customers" + - But you cant rename the file to "customers" because it will conflict with your other model file "customers" + - Even if you put it in a different folder, eg "staging/customers.sql" - dbt still has a global namespace so it will conflict + when you try to do something like "{{ ref('customers') }}" + - So dbt's solution to this problem is to keep calling it "stg_customers" at the dbt project/model level, + but allow overriding the physical table to "customers" via something like "{{ config(alias='customers', schema='staging') }}" + + Note that if :alias is set, it does *not* replace :name at the model level and cannot be used interchangably with :name. + It also does not affect the :fqn or :unique_id. It's just used to override :name when it comes time to generate the physical table name. + """ + + @model_validator(mode="after") + def post_init(self) -> Self: + # by default, dbt sets alias to the same as :name + # however, we only want to include :alias if it is actually different / actually providing an override + if self.alias == self.name: + self.alias = None + return self + + def to_expression(self) -> exp.Expression: + """Produce a SQLGlot expression representing this object, for use in things like the model/audit definition renderers""" + return exp.tuple_( + *( + exp.PropertyEQ(this=exp.var(k), expression=exp.Literal.string(v)) + for k, v in sorted(self.model_dump(exclude_none=True).items()) + ) + ) + + +class DbtInfoMixin: + """This mixin encapsulates properties that only exist for dbt compatibility and are otherwise not required + for native projects""" + + @property + def dbt_node_info(self) -> t.Optional[DbtNodeInfo]: + raise NotImplementedError() + + @property + def dbt_unique_id(self) -> t.Optional[str]: + """Used for compatibility with jinja context variables such as {{ selected_resources }}""" + if self.dbt_node_info: + return self.dbt_node_info.unique_id + return None + + @property + def dbt_fqn(self) -> t.Optional[str]: + """Used in the selector engine for compatibility with selectors that select models by dbt fqn""" + if self.dbt_node_info: + return self.dbt_node_info.fqn + return None + + # this must be sorted in descending order INTERVAL_SECONDS = { IntervalUnit.YEAR: 60 * 60 * 24 * 365, @@ -165,7 +260,7 @@ def milliseconds(self) -> int: } -class _Node(PydanticModel): +class _Node(DbtInfoMixin, PydanticModel): """ Node is the core abstraction for entity that can be executed within the scheduler. @@ -199,7 +294,7 @@ class _Node(PydanticModel): interval_unit_: t.Optional[IntervalUnit] = Field(alias="interval_unit", default=None) tags: t.List[str] = [] stamp: t.Optional[str] = None - dbt_name: t.Optional[str] = None # dbt node name + dbt_node_info_: t.Optional[DbtNodeInfo] = Field(alias="dbt_node_info", default=None) _path: t.Optional[Path] = None _data_hash: t.Optional[str] = None _metadata_hash: t.Optional[str] = None @@ -446,6 +541,10 @@ def is_audit(self) -> bool: """Return True if this is an audit node""" return False + @property + def dbt_node_info(self) -> t.Optional[DbtNodeInfo]: + return self.dbt_node_info_ + class NodeType(str, Enum): MODEL = "model" diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py index fd2e1cf004..af4d72b165 100644 --- a/sqlmesh/core/scheduler.py +++ b/sqlmesh/core/scheduler.py @@ -839,7 +839,9 @@ def _run_or_audit( run_environment_statements=run_environment_statements, audit_only=audit_only, auto_restatement_triggers=auto_restatement_triggers, - selected_models={s.node.dbt_name for s in merged_intervals if s.node.dbt_name}, + selected_models={ + s.node.dbt_unique_id for s in merged_intervals if s.node.dbt_unique_id + }, ) return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 3534b95bc3..4637bbf91c 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -13,6 +13,7 @@ from sqlmesh.core.config.base import UpdateStrategy from sqlmesh.core.config.common import VirtualEnvironmentMode from sqlmesh.core.model import Model +from sqlmesh.core.node import DbtNodeInfo from sqlmesh.dbt.column import ( ColumnConfig, column_descriptions_to_sqlmesh, @@ -120,8 +121,10 @@ class BaseModelConfig(GeneralConfig): grain: t.Union[str, t.List[str]] = [] # DBT configuration fields + unique_id: str = "" name: str = "" package_name: str = "" + fqn: t.List[str] = [] schema_: str = Field("", alias="schema") database: t.Optional[str] = None alias: t.Optional[str] = None @@ -273,12 +276,10 @@ def sqlmesh_config_fields(self) -> t.Set[str]: return {"description", "owner", "stamp", "storage_format"} @property - def node_name(self) -> str: - resource_type = getattr(self, "resource_type", "model") - node_name = f"{resource_type}.{self.package_name}.{self.name}" - if self.version: - node_name += f".v{self.version}" - return node_name + def node_info(self) -> DbtNodeInfo: + return DbtNodeInfo( + unique_id=self.unique_id, name=self.name, fqn=".".join(self.fqn), alias=self.alias + ) def sqlmesh_model_kwargs( self, @@ -349,8 +350,8 @@ def to_sqlmesh( def _model_jinja_context( self, context: DbtContext, dependencies: Dependencies ) -> t.Dict[str, t.Any]: - if context._manifest and self.node_name in context._manifest._manifest.nodes: - attributes = context._manifest._manifest.nodes[self.node_name].to_dict() + if context._manifest and self.unique_id in context._manifest._manifest.nodes: + attributes = context._manifest._manifest.nodes[self.unique_id].to_dict() if dependencies.model_attrs.all_attrs: model_node: AttributeDict[str, t.Any] = AttributeDict(attributes) else: diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index efad5e790b..9386b0b4f8 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -694,7 +694,7 @@ def to_sqlmesh( extract_dependencies_from_query=False, allow_partials=allow_partials, virtual_environment_mode=virtual_environment_mode, - dbt_name=self.node_name, + dbt_node_info=self.node_info, **optional_kwargs, **model_kwargs, ) diff --git a/sqlmesh/dbt/seed.py b/sqlmesh/dbt/seed.py index d6ecc768f9..c0c8186f29 100644 --- a/sqlmesh/dbt/seed.py +++ b/sqlmesh/dbt/seed.py @@ -92,7 +92,7 @@ def to_sqlmesh( audit_definitions=audit_definitions, virtual_environment_mode=virtual_environment_mode, start=self.start or context.sqlmesh_config.model_defaults.start, - dbt_name=self.node_name, + dbt_node_info=self.node_info, **kwargs, ) diff --git a/sqlmesh/dbt/test.py b/sqlmesh/dbt/test.py index 5c18ff4d81..747c9d469c 100644 --- a/sqlmesh/dbt/test.py +++ b/sqlmesh/dbt/test.py @@ -8,6 +8,7 @@ from pydantic import Field import sqlmesh.core.dialect as d from sqlmesh.core.audit import Audit, ModelAudit, StandaloneAudit +from sqlmesh.core.node import DbtNodeInfo from sqlmesh.dbt.common import ( Dependencies, GeneralConfig, @@ -79,8 +80,10 @@ class TestConfig(GeneralConfig): dialect_: t.Optional[str] = Field(None, alias="dialect") # dbt fields + unique_id: str = "" package_name: str = "" alias: t.Optional[str] = None + fqn: t.List[str] = [] schema_: t.Optional[str] = Field("", alias="schema") database: t.Optional[str] = None severity: Severity = Severity.ERROR @@ -155,6 +158,7 @@ def to_sqlmesh(self, context: DbtContext) -> Audit: jinja_macros.add_globals({"this": self.relation_info}) audit = StandaloneAudit( name=self.name, + dbt_node_info=self.node_info, dialect=self.dialect(context), skip=skip, query=query, @@ -171,6 +175,7 @@ def to_sqlmesh(self, context: DbtContext) -> Audit: else: audit = ModelAudit( name=self.name, + dbt_node_info=self.node_info, dialect=self.dialect(context), skip=skip, blocking=blocking, @@ -214,6 +219,12 @@ def relation_info(self) -> AttributeDict: } ) + @property + def node_info(self) -> DbtNodeInfo: + return DbtNodeInfo( + unique_id=self.unique_id, name=self.name, fqn=".".join(self.fqn), alias=self.alias + ) + def _remove_jinja_braces(jinja_str: str) -> str: no_braces = jinja_str diff --git a/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py b/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py new file mode 100644 index 0000000000..c8acd0bafd --- /dev/null +++ b/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py @@ -0,0 +1,105 @@ +"""Replace 'dbt_name' with 'dbt_node_info' in the snapshot definition""" + +import json +from sqlglot import exp +from sqlmesh.utils.migration import index_text_type, blob_text_type + + +def migrate_schemas(state_sync, **kwargs): # type: ignore + pass + + +def migrate_rows(state_sync, **kwargs): # type: ignore + import pandas as pd + + engine_adapter = state_sync.engine_adapter + schema = state_sync.schema + snapshots_table = "_snapshots" + if schema: + snapshots_table = f"{schema}.{snapshots_table}" + + index_type = index_text_type(engine_adapter.dialect) + blob_type = blob_text_type(engine_adapter.dialect) + + new_snapshots = [] + migration_needed = False + + for ( + name, + identifier, + version, + snapshot, + kind_name, + updated_ts, + unpaused_ts, + ttl_ms, + unrestorable, + forward_only, + dev_version, + fingerprint, + ) in engine_adapter.fetchall( + exp.select( + "name", + "identifier", + "version", + "snapshot", + "kind_name", + "updated_ts", + "unpaused_ts", + "ttl_ms", + "unrestorable", + "forward_only", + "dev_version", + "fingerprint", + ).from_(snapshots_table), + quote_identifiers=True, + ): + parsed_snapshot = json.loads(snapshot) + if dbt_name := parsed_snapshot["node"].get("dbt_name"): + parsed_snapshot["node"].pop("dbt_name") + parsed_snapshot["node"]["dbt_node_info"] = { + "unique_id": dbt_name, + # these will get populated as metadata-only changes on the next plan + "name": "", + "fqn": "", + } + migration_needed = True + + new_snapshots.append( + { + "name": name, + "identifier": identifier, + "version": version, + "snapshot": json.dumps(parsed_snapshot), + "kind_name": kind_name, + "updated_ts": updated_ts, + "unpaused_ts": unpaused_ts, + "ttl_ms": ttl_ms, + "unrestorable": unrestorable, + "forward_only": forward_only, + "dev_version": dev_version, + "fingerprint": fingerprint, + } + ) + + if migration_needed and new_snapshots: + engine_adapter.delete_from(snapshots_table, "TRUE") + + engine_adapter.insert_append( + snapshots_table, + pd.DataFrame(new_snapshots), + target_columns_to_types={ + "name": exp.DataType.build(index_type), + "identifier": exp.DataType.build(index_type), + "version": exp.DataType.build(index_type), + "snapshot": exp.DataType.build(blob_type), + "kind_name": exp.DataType.build(index_type), + "updated_ts": exp.DataType.build("bigint"), + "unpaused_ts": exp.DataType.build("bigint"), + "ttl_ms": exp.DataType.build("bigint"), + "unrestorable": exp.DataType.build("boolean"), + "forward_only": exp.DataType.build("boolean"), + "dev_version": exp.DataType.build(index_type), + "fingerprint": exp.DataType.build(blob_type), + }, + ) diff --git a/sqlmesh_dbt/console.py b/sqlmesh_dbt/console.py index 3c62adfe68..6bf7a1618f 100644 --- a/sqlmesh_dbt/console.py +++ b/sqlmesh_dbt/console.py @@ -1,6 +1,7 @@ import typing as t from sqlmesh.core.console import TerminalConsole from sqlmesh.core.model import Model +from sqlmesh.core.snapshot.definition import Node from rich.tree import Tree @@ -9,19 +10,26 @@ def print(self, msg: str) -> None: return self._print(msg) def list_models( - self, models: t.List[Model], list_parents: bool = True, list_audits: bool = True + self, + models: t.List[Model], + all_nodes: t.Dict[str, Node], + list_parents: bool = True, + list_audits: bool = True, ) -> None: model_list = Tree("[bold]Models in project:[/bold]") for model in models: - model_tree = model_list.add(model.name) + model_tree = model_list.add(model.dbt_fqn or model.name) if list_parents: - for parent in model.depends_on: - model_tree.add(f"depends_on: {parent}") + for parent_name in model.depends_on: + if parent := all_nodes.get(parent_name): + parent_name = parent.dbt_fqn or parent_name + + model_tree.add(f"depends_on: {parent_name}") if list_audits: - for audit_name in model.audit_definitions: - model_tree.add(f"audit: {audit_name}") + for audit_name, audit in model.audit_definitions.items(): + model_tree.add(f"audit: {audit.dbt_fqn or audit_name}") self._print(model_list) diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py index f95d0d931e..e15a2cb93e 100644 --- a/sqlmesh_dbt/operations.py +++ b/sqlmesh_dbt/operations.py @@ -32,7 +32,9 @@ def list_( # - "data tests" (audits) for those models # it also applies selectors which is useful for testing selectors selected_models = list(self._selected_models(select, exclude).values()) - self.console.list_models(selected_models) + self.console.list_models( + selected_models, {k: v.node for k, v in self.context.snapshots.items()} + ) def run( self, @@ -260,7 +262,7 @@ def create( return DbtOperations(sqlmesh_context, dbt_project, debug=debug) -def init_project_if_required(project_dir: Path) -> None: +def init_project_if_required(project_dir: Path, start: t.Optional[str] = None) -> None: """ SQLMesh needs a start date to as the starting point for calculating intervals on incremental models, amongst other things @@ -276,4 +278,6 @@ def init_project_if_required(project_dir: Path) -> None: if not any(f.exists() for f in [project_dir / file for file in ALL_CONFIG_FILENAMES]): get_console().log_warning("No existing SQLMesh config detected; creating one") - init_example_project(path=project_dir, engine_type=None, template=ProjectTemplate.DBT) + init_example_project( + path=project_dir, engine_type=None, template=ProjectTemplate.DBT, start=start + ) diff --git a/tests/core/test_audit.py b/tests/core/test_audit.py index ed67975e9e..2ffcbbc4b2 100644 --- a/tests/core/test_audit.py +++ b/tests/core/test_audit.py @@ -5,6 +5,7 @@ from sqlmesh.core import constants as c from sqlmesh.core.config.model import ModelDefaultsConfig from sqlmesh.core.context import Context +from sqlmesh.core.node import DbtNodeInfo from sqlmesh.core.audit import ( ModelAudit, StandaloneAudit, @@ -12,7 +13,7 @@ load_audit, load_multiple_audits, ) -from sqlmesh.core.dialect import parse +from sqlmesh.core.dialect import parse, jinja_query from sqlmesh.core.model import ( FullKind, IncrementalByTimeRangeKind, @@ -730,6 +731,27 @@ def test_render_definition(): assert "def test_macro(evaluator, v):" in format_model_expressions(audit.render_definition()) +def test_render_definition_dbt_node_info(): + node_info = DbtNodeInfo( + unique_id="test.project.my_audit", name="my_audit", fqn="project.my_audit" + ) + + audit = StandaloneAudit(name="my_audit", dbt_node_info=node_info, query=jinja_query("select 1")) + + assert ( + audit.render_definition()[0].sql(pretty=True) + == """AUDIT ( + name my_audit, + dbt_node_info ( + fqn := 'project.my_audit', + name := 'my_audit', + unique_id := 'test.project.my_audit' + ), + standalone TRUE +)""" + ) + + def test_text_diff(): expressions = parse( """ diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 00ff48b0d2..726ac52b66 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -61,7 +61,7 @@ from sqlmesh.core.model.common import parse_expression from sqlmesh.core.model.kind import ModelKindName, _model_kind_validator from sqlmesh.core.model.seed import CsvSettings -from sqlmesh.core.node import IntervalUnit, _Node +from sqlmesh.core.node import IntervalUnit, _Node, DbtNodeInfo from sqlmesh.core.signal import signal from sqlmesh.core.snapshot import Snapshot, SnapshotChangeCategory from sqlmesh.utils.date import TimeLike, to_datetime, to_ds, to_timestamp @@ -2100,6 +2100,33 @@ def test_render_definition_with_virtual_update_statements(): ) +def test_render_definition_dbt_node_info(): + node_info = DbtNodeInfo(unique_id="model.db.table", name="table", fqn="db.table") + model = load_sql_based_model( + d.parse( + f""" + MODEL ( + name db.table, + kind FULL + ); + + select 1 as a; + """ + ), + dbt_node_info=node_info, + ) + + assert model.dbt_node_info + assert ( + model.render_definition()[0].sql(pretty=True) + == """MODEL ( + name db.table, + dbt_node_info (fqn := 'db.table', name := 'table', unique_id := 'model.db.table'), + kind FULL +)""" + ) + + def test_cron(): daily = _Node(name="x", cron="@daily") assert to_datetime(daily.cron_prev("2020-01-01")) == to_datetime("2019-12-31") diff --git a/tests/dbt/cli/conftest.py b/tests/dbt/cli/conftest.py index e555f9144a..26757bf3ab 100644 --- a/tests/dbt/cli/conftest.py +++ b/tests/dbt/cli/conftest.py @@ -1,82 +1,7 @@ import typing as t -from pathlib import Path -import os import functools from click.testing import CliRunner, Result -from sqlmesh_dbt.operations import init_project_if_required import pytest -import uuid - - -class EmptyProjectCreator(t.Protocol): - def __call__( - self, project_name: t.Optional[str] = None, target_name: t.Optional[str] = None - ) -> Path: ... - - -@pytest.fixture -def jaffle_shop_duckdb(copy_to_temp_path: t.Callable[..., t.List[Path]]) -> t.Iterable[Path]: - fixture_path = Path(__file__).parent / "fixtures" / "jaffle_shop_duckdb" - assert fixture_path.exists() - - current_path = os.getcwd() - output_path = copy_to_temp_path(paths=fixture_path)[0] - - # so that we can invoke commands from the perspective of a user that is already in the correct directory - os.chdir(output_path) - - yield output_path - - os.chdir(current_path) - - -@pytest.fixture -def create_empty_project( - copy_to_temp_path: t.Callable[..., t.List[Path]], -) -> t.Iterable[t.Callable[..., Path]]: - default_project_name = f"test_{str(uuid.uuid4())[:8]}" - default_target_name = "duckdb" - fixture_path = Path(__file__).parent / "fixtures" / "empty_project" - assert fixture_path.exists() - - current_path = os.getcwd() - - def _create_empty_project( - project_name: t.Optional[str] = None, target_name: t.Optional[str] = None - ) -> Path: - project_name = project_name or default_project_name - target_name = target_name or default_target_name - output_path = copy_to_temp_path(paths=fixture_path)[0] - - dbt_project_yml = output_path / "dbt_project.yml" - profiles_yml = output_path / "profiles.yml" - - assert dbt_project_yml.exists() - assert profiles_yml.exists() - - (output_path / "models").mkdir() - (output_path / "seeds").mkdir() - - dbt_project_yml.write_text( - dbt_project_yml.read_text().replace("empty_project", project_name) - ) - profiles_yml.write_text( - profiles_yml.read_text() - .replace("empty_project", project_name) - .replace("__DEFAULT_TARGET__", target_name) - ) - - init_project_if_required(output_path) - - # so that we can invoke commands from the perspective of a user that is already in the correct directory - os.chdir(output_path) - - return output_path - - yield _create_empty_project - - # cleanup - switch cwd back to original - os.chdir(current_path) @pytest.fixture diff --git a/tests/dbt/cli/test_list.py b/tests/dbt/cli/test_list.py index 1bc22ce87e..4d294decc1 100644 --- a/tests/dbt/cli/test_list.py +++ b/tests/dbt/cli/test_list.py @@ -12,10 +12,10 @@ def test_list(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): assert result.exit_code == 0 assert not result.exception - assert "main.orders" in result.output - assert "main.customers" in result.output - assert "main.stg_payments" in result.output - assert "main.raw_orders" in result.output + assert "─ jaffle_shop.orders" in result.output + assert "─ jaffle_shop.customers" in result.output + assert "─ jaffle_shop.staging.stg_payments" in result.output + assert "─ jaffle_shop.raw_orders" in result.output def test_list_select(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): @@ -24,12 +24,12 @@ def test_list_select(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Resul assert result.exit_code == 0 assert not result.exception - assert "main.customers" in result.output - assert "main.stg_customers" in result.output - assert "main.raw_customers" in result.output + assert "─ jaffle_shop.customers" in result.output + assert "─ jaffle_shop.staging.stg_customers" in result.output + assert "─ jaffle_shop.raw_customers" in result.output - assert "main.stg_payments" not in result.output - assert "main.raw_orders" not in result.output + assert "─ jaffle_shop.staging.stg_payments" not in result.output + assert "─ jaffle_shop.raw_orders" not in result.output def test_list_select_exclude(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): @@ -39,13 +39,13 @@ def test_list_select_exclude(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[.. assert result.exit_code == 0 assert not result.exception - assert "main.customers" in result.output - assert "main.stg_customers" in result.output - assert "main.raw_customers" in result.output + assert "─ jaffle_shop.customers" in result.output + assert "─ jaffle_shop.staging.stg_customers" in result.output + assert "─ jaffle_shop.raw_customers" in result.output - assert "main.orders" not in result.output - assert "main.stg_payments" not in result.output - assert "main.raw_orders" not in result.output + assert "─ jaffle_shop.orders" not in result.output + assert "─ jaffle_shop.staging.stg_payments" not in result.output + assert "─ jaffle_shop.raw_orders" not in result.output # multiple exclude for args in ( @@ -56,21 +56,26 @@ def test_list_select_exclude(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[.. assert result.exit_code == 0 assert not result.exception - assert "main.stg_orders" in result.output + assert "─ jaffle_shop.staging.stg_orders" in result.output - assert "main.customers" not in result.output - assert "main.orders" not in result.output + assert "─ jaffle_shop.customers" not in result.output + assert "─ jaffle_shop.orders" not in result.output def test_list_with_vars(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): - (jaffle_shop_duckdb / "models" / "aliased_model.sql").write_text(""" - {{ config(alias='model_' + var('foo')) }} - select 1 + ( + jaffle_shop_duckdb / "models" / "vars_model.sql" + ).write_text(""" + select * from {{ ref('custom' + var('foo')) }} """) - result = invoke_cli(["list", "--vars", "foo: bar"]) + result = invoke_cli(["list", "--vars", "foo: ers"]) assert result.exit_code == 0 assert not result.exception - assert "model_bar" in result.output + assert ( + """├── jaffle_shop.vars_model +│ └── depends_on: jaffle_shop.customers""" + in result.output + ) diff --git a/tests/dbt/cli/test_operations.py b/tests/dbt/cli/test_operations.py index e9c4dc0063..769887efe4 100644 --- a/tests/dbt/cli/test_operations.py +++ b/tests/dbt/cli/test_operations.py @@ -8,7 +8,7 @@ import time_machine from sqlmesh.core.plan import PlanBuilder from sqlmesh.core.config.common import VirtualEnvironmentMode -from tests.dbt.cli.conftest import EmptyProjectCreator +from tests.dbt.conftest import EmptyProjectCreator pytestmark = pytest.mark.slow @@ -273,7 +273,7 @@ def test_run_option_full_refresh( create_empty_project: EmptyProjectCreator, env_name: str, vde_mode: VirtualEnvironmentMode ): # create config file prior to load - project_path = create_empty_project(project_name="test") + project_path, models_path = create_empty_project(project_name="test") config_path = project_path / "sqlmesh.yaml" config = yaml.load(config_path) @@ -282,8 +282,8 @@ def test_run_option_full_refresh( with config_path.open("w") as f: yaml.dump(config, f) - (project_path / "models" / "model_a.sql").write_text("select 1") - (project_path / "models" / "model_b.sql").write_text("select 2") + (models_path / "model_a.sql").write_text("select 1") + (models_path / "model_b.sql").write_text("select 2") operations = create(project_dir=project_path) diff --git a/tests/dbt/cli/test_run.py b/tests/dbt/cli/test_run.py index 9af1de8561..788a7b04a8 100644 --- a/tests/dbt/cli/test_run.py +++ b/tests/dbt/cli/test_run.py @@ -5,7 +5,7 @@ import time_machine from sqlmesh_dbt.operations import create from tests.cli.test_cli import FREEZE_TIME -from tests.dbt.cli.conftest import EmptyProjectCreator +from tests.dbt.conftest import EmptyProjectCreator pytestmark = pytest.mark.slow @@ -45,13 +45,13 @@ def test_run_with_selectors(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[... def test_run_with_changes_and_full_refresh( create_empty_project: EmptyProjectCreator, invoke_cli: t.Callable[..., Result] ): - project_path = create_empty_project(project_name="test") + project_path, models_path = create_empty_project(project_name="test") engine_adapter = create(project_path).context.engine_adapter engine_adapter.execute("create table external_table as select 'foo' as a, 'bar' as b") - (project_path / "models" / "model_a.sql").write_text("select a, b from external_table") - (project_path / "models" / "model_b.sql").write_text("select a, b from {{ ref('model_a') }}") + (models_path / "model_a.sql").write_text("select a, b from external_table") + (models_path / "model_b.sql").write_text("select a, b from {{ ref('model_a') }}") # populate initial env result = invoke_cli(["run"]) diff --git a/tests/dbt/conftest.py b/tests/dbt/conftest.py index 5875d9f575..56d77e7496 100644 --- a/tests/dbt/conftest.py +++ b/tests/dbt/conftest.py @@ -1,6 +1,8 @@ from __future__ import annotations import typing as t +import os +from pathlib import Path import pytest @@ -8,6 +10,17 @@ from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.project import Project from sqlmesh.dbt.target import PostgresConfig +from sqlmesh_dbt.operations import init_project_if_required +import uuid + + +class EmptyProjectCreator(t.Protocol): + def __call__( + self, + project_name: t.Optional[str] = None, + target_name: t.Optional[str] = None, + start: t.Optional[str] = None, + ) -> t.Tuple[Path, Path]: ... @pytest.fixture() @@ -15,6 +28,80 @@ def sushi_test_project(sushi_test_dbt_context: Context) -> Project: return sushi_test_dbt_context._loaders[0]._load_projects()[0] # type: ignore +@pytest.fixture +def create_empty_project( + copy_to_temp_path: t.Callable[..., t.List[Path]], +) -> t.Iterable[EmptyProjectCreator]: + default_project_name = f"test_{str(uuid.uuid4())[:8]}" + default_target_name = "duckdb" + fixture_path = Path(__file__).parent.parent / "fixtures" / "dbt" / "empty_project" + assert fixture_path.exists() + + current_path = os.getcwd() + + def _create_empty_project( + project_name: t.Optional[str] = None, + target_name: t.Optional[str] = None, + start: t.Optional[str] = None, + ) -> t.Tuple[Path, Path]: + project_name = project_name or default_project_name + target_name = target_name or default_target_name + output_path = copy_to_temp_path(paths=fixture_path)[0] + + dbt_project_yml = output_path / "dbt_project.yml" + profiles_yml = output_path / "profiles.yml" + + assert dbt_project_yml.exists() + assert profiles_yml.exists() + + models_path = output_path / "models" + (models_path).mkdir() + (output_path / "seeds").mkdir() + + dbt_project_yml.write_text( + dbt_project_yml.read_text().replace("empty_project", project_name) + ) + profiles_yml.write_text( + profiles_yml.read_text() + .replace("empty_project", project_name) + .replace("__DEFAULT_TARGET__", target_name) + ) + + init_project_if_required(output_path, start) + + # so that we can invoke commands from the perspective of a user that is already in the correct directory + os.chdir(output_path) + + return output_path, models_path + + yield _create_empty_project + + # cleanup - switch cwd back to original + os.chdir(current_path) + + +@pytest.fixture +def jaffle_shop_duckdb(copy_to_temp_path: t.Callable[..., t.List[Path]]) -> t.Iterable[Path]: + fixture_path = Path(__file__).parent.parent / "fixtures" / "dbt" / "jaffle_shop_duckdb" + assert fixture_path.exists() + + current_path = os.getcwd() + output_path = copy_to_temp_path(paths=fixture_path)[0] + + # so that we can invoke commands from the perspective of a user that is alrady in the correct directory + os.chdir(output_path) + + yield output_path + + os.chdir(current_path) + + +@pytest.fixture +def jaffle_shop_duckdb_context(jaffle_shop_duckdb: Path) -> Context: + init_project_if_required(jaffle_shop_duckdb) + return Context(paths=[jaffle_shop_duckdb]) + + @pytest.fixture() def runtime_renderer() -> t.Callable: def create_renderer(context: DbtContext, **kwargs: t.Any) -> t.Callable: diff --git a/tests/dbt/test_config.py b/tests/dbt/test_config.py index fe226d4926..0e96024aa1 100644 --- a/tests/dbt/test_config.py +++ b/tests/dbt/test_config.py @@ -91,8 +91,10 @@ def test_update(current: t.Dict[str, t.Any], new: t.Dict[str, t.Any], expected: def test_model_to_sqlmesh_fields(dbt_dummy_postgres_config: PostgresConfig): model_config = ModelConfig( + unique_id="model.package.name", name="name", package_name="package", + fqn=["package", "name"], alias="model", schema="custom", database="database", @@ -123,6 +125,8 @@ def test_model_to_sqlmesh_fields(dbt_dummy_postgres_config: PostgresConfig): assert isinstance(model, SqlModel) assert model.name == "database.custom.model" + assert model.dbt_unique_id == "model.package.name" + assert model.dbt_fqn == "package.name" assert model.description == "test model" assert ( model.render_query_or_raise().sql() @@ -185,7 +189,9 @@ def test_model_to_sqlmesh_fields(dbt_dummy_postgres_config: PostgresConfig): def test_test_to_sqlmesh_fields(): sql = "SELECT * FROM FOO WHERE cost > 100" test_config = TestConfig( + unique_id="test.test_package.foo_test", name="foo_test", + fqn=["test_package", "foo_test"], sql=sql, model_name="Foo", column_name="cost", @@ -199,6 +205,8 @@ def test_test_to_sqlmesh_fields(): audit = test_config.to_sqlmesh(context) assert audit.name == "foo_test" + assert audit.dbt_unique_id == "test.test_package.foo_test" + assert audit.dbt_fqn == "test_package.foo_test" assert audit.dialect == "duckdb" assert not audit.skip assert audit.blocking diff --git a/tests/dbt/test_integration.py b/tests/dbt/test_integration.py index 5a944d55d4..e1f051dbcf 100644 --- a/tests/dbt/test_integration.py +++ b/tests/dbt/test_integration.py @@ -540,3 +540,67 @@ def test_scd_type_2_by_column( ) df_expected = create_df(expected_table_data, self.target_schema) compare_dataframes(df_actual, df_expected, msg=f"Failed on time {time}") + + +def test_dbt_node_info(jaffle_shop_duckdb_context: Context): + ctx = jaffle_shop_duckdb_context + + customers = ctx.models['"jaffle_shop"."main"."customers"'] + assert customers.dbt_unique_id == "model.jaffle_shop.customers" + assert customers.dbt_fqn == "jaffle_shop.customers" + assert customers.dbt_node_info + assert customers.dbt_node_info.name == "customers" + + orders = ctx.models['"jaffle_shop"."main"."orders"'] + assert orders.dbt_unique_id == "model.jaffle_shop.orders" + assert orders.dbt_fqn == "jaffle_shop.orders" + assert orders.dbt_node_info + assert orders.dbt_node_info.name == "orders" + + stg_customers = ctx.models['"jaffle_shop"."main"."stg_customers"'] + assert stg_customers.dbt_unique_id == "model.jaffle_shop.stg_customers" + assert stg_customers.dbt_fqn == "jaffle_shop.staging.stg_customers" + assert stg_customers.dbt_node_info + assert stg_customers.dbt_node_info.name == "stg_customers" + + stg_orders = ctx.models['"jaffle_shop"."main"."stg_orders"'] + assert stg_orders.dbt_unique_id == "model.jaffle_shop.stg_orders" + assert stg_orders.dbt_fqn == "jaffle_shop.staging.stg_orders" + assert stg_orders.dbt_node_info + assert stg_orders.dbt_node_info.name == "stg_orders" + + raw_customers = ctx.models['"jaffle_shop"."main"."raw_customers"'] + assert raw_customers.dbt_unique_id == "seed.jaffle_shop.raw_customers" + assert raw_customers.dbt_fqn == "jaffle_shop.raw_customers" + assert raw_customers.dbt_node_info + assert raw_customers.dbt_node_info.name == "raw_customers" + + raw_orders = ctx.models['"jaffle_shop"."main"."raw_orders"'] + assert raw_orders.dbt_unique_id == "seed.jaffle_shop.raw_orders" + assert raw_orders.dbt_fqn == "jaffle_shop.raw_orders" + assert raw_orders.dbt_node_info + assert raw_orders.dbt_node_info.name == "raw_orders" + + raw_payments = ctx.models['"jaffle_shop"."main"."raw_payments"'] + assert raw_payments.dbt_unique_id == "seed.jaffle_shop.raw_payments" + assert raw_payments.dbt_fqn == "jaffle_shop.raw_payments" + assert raw_payments.dbt_node_info + assert raw_payments.dbt_node_info.name == "raw_payments" + + relationship_audit = ctx.snapshots[ + "relationships_orders_customer_id__customer_id__ref_customers_" + ] + assert relationship_audit.node.is_audit + assert ( + relationship_audit.node.dbt_unique_id + == "test.jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_.c6ec7f58f2" + ) + assert ( + relationship_audit.node.dbt_fqn + == "jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_" + ) + assert relationship_audit.node.dbt_node_info + assert ( + relationship_audit.node.dbt_node_info.name + == "relationships_orders_customer_id__customer_id__ref_customers_" + ) diff --git a/tests/dbt/test_model.py b/tests/dbt/test_model.py index 7bcfe98768..a64b29e89d 100644 --- a/tests/dbt/test_model.py +++ b/tests/dbt/test_model.py @@ -1,5 +1,4 @@ import datetime -import typing as t import pytest from pathlib import Path @@ -16,53 +15,11 @@ from sqlmesh.dbt.target import PostgresConfig from sqlmesh.dbt.test import TestConfig from sqlmesh.utils.yaml import YAML +from sqlmesh.utils.date import to_ds pytestmark = pytest.mark.dbt -@pytest.fixture -def create_empty_project(tmp_path: Path) -> t.Callable[[], t.Tuple[Path, Path]]: - def _create_empty_project() -> t.Tuple[Path, Path]: - yaml = YAML() - dbt_project_dir = tmp_path / "dbt" - dbt_project_dir.mkdir() - dbt_model_dir = dbt_project_dir / "models" - dbt_model_dir.mkdir() - dbt_project_config = { - "name": "empty_project", - "version": "1.0.0", - "config-version": 2, - "profile": "test", - "model-paths": ["models"], - } - dbt_project_file = dbt_project_dir / "dbt_project.yml" - with open(dbt_project_file, "w", encoding="utf-8") as f: - YAML().dump(dbt_project_config, f) - sqlmesh_config = { - "model_defaults": { - "start": "2025-01-01", - } - } - sqlmesh_config_file = dbt_project_dir / "sqlmesh.yaml" - with open(sqlmesh_config_file, "w", encoding="utf-8") as f: - YAML().dump(sqlmesh_config, f) - dbt_data_dir = tmp_path / "dbt_data" - dbt_data_dir.mkdir() - dbt_data_file = dbt_data_dir / "local.db" - dbt_profile_config = { - "test": { - "outputs": {"duckdb": {"type": "duckdb", "path": str(dbt_data_file)}}, - "target": "duckdb", - } - } - db_profile_file = dbt_project_dir / "profiles.yml" - with open(db_profile_file, "w", encoding="utf-8") as f: - yaml.dump(dbt_profile_config, f) - return dbt_project_dir, dbt_model_dir - - return _create_empty_project - - def test_test_config_is_standalone_behavior() -> None: """Test that TestConfig.is_standalone correctly identifies tests with cross-model references""" @@ -174,7 +131,7 @@ def test_manifest_filters_standalone_tests_from_models( ) -> None: """Integration test that verifies models only contain non-standalone tests after manifest loading.""" yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # Create two models model1_contents = "SELECT 1 as id" @@ -265,7 +222,7 @@ def test_load_invalid_ref_audit_constraints( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it full_model_contents = """{{ config(tags=["blah"], tests=[{"blah": {"to": "ref('completely_ignored')", "field": "blah2"} }]) }} SELECT 1 as cola""" full_model_file = model_dir / "full_model.sql" @@ -332,7 +289,7 @@ def test_load_invalid_ref_audit_constraints( def test_load_microbatch_all_defined( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it microbatch_contents = """ {{ @@ -373,7 +330,7 @@ def test_load_microbatch_all_defined( def test_load_microbatch_all_defined_diff_values( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it microbatch_contents = """ {{ @@ -415,7 +372,7 @@ def test_load_microbatch_all_defined_diff_values( def test_load_microbatch_required_only( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it microbatch_contents = """ {{ @@ -454,7 +411,7 @@ def test_load_microbatch_required_only( def test_load_incremental_time_range_strategy_required_only( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local", start="2025-01-01") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it incremental_time_range_contents = """ {{ @@ -476,7 +433,7 @@ def test_load_incremental_time_range_strategy_required_only( snapshot = context.snapshots[snapshot_fqn] model = snapshot.model # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model kind attributes assert isinstance(model.kind, IncrementalByTimeRangeKind) @@ -496,7 +453,7 @@ def test_load_incremental_time_range_strategy_required_only( def test_load_incremental_time_range_strategy_all_defined( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local", start="2025-01-01") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it incremental_time_range_contents = """ {{ @@ -532,7 +489,7 @@ def test_load_incremental_time_range_strategy_all_defined( snapshot = context.snapshots[snapshot_fqn] model = snapshot.model # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model kind attributes assert isinstance(model.kind, IncrementalByTimeRangeKind) @@ -559,7 +516,7 @@ def test_load_incremental_time_range_strategy_all_defined( def test_load_deprecated_incremental_time_column( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local", start="2025-01-01") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it incremental_time_range_contents = """ {{ @@ -580,10 +537,10 @@ def test_load_deprecated_incremental_time_column( context = Context(paths=project_dir) model = context.snapshots[snapshot_fqn].model # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model kind attributes assert isinstance(model.kind, IncrementalByTimeRangeKind) @@ -606,7 +563,7 @@ def test_load_microbatch_with_ref( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") source_schema = { "version": 2, "sources": [ @@ -672,7 +629,7 @@ def test_load_microbatch_with_ref_no_filter( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") source_schema = { "version": 2, "sources": [ @@ -749,21 +706,6 @@ def test_load_multiple_snapshots_defined_in_same_file(sushi_test_dbt_context: Co def test_dbt_jinja_macro_undefined_variable_error(create_empty_project): project_dir, model_dir = create_empty_project() - dbt_profile_config = { - "test": { - "outputs": { - "duckdb": { - "type": "duckdb", - "path": str(project_dir.parent / "dbt_data" / "main.db"), - } - }, - "target": "duckdb", - } - } - db_profile_file = project_dir / "profiles.yml" - with open(db_profile_file, "w", encoding="utf-8") as f: - YAML().dump(dbt_profile_config, f) - macros_dir = project_dir / "macros" macros_dir.mkdir() @@ -801,6 +743,8 @@ def test_dbt_jinja_macro_undefined_variable_error(create_empty_project): @pytest.mark.slow def test_node_name_populated_for_dbt_models(dbt_dummy_postgres_config: PostgresConfig) -> None: model_config = ModelConfig( + unique_id="model.test_package.test_model", + fqn=["test_package", "test_model"], name="test_model", package_name="test_package", sql="SELECT 1 as id", @@ -815,7 +759,8 @@ def test_node_name_populated_for_dbt_models(dbt_dummy_postgres_config: PostgresC # check after convert to SQLMesh model that node_name is populated correctly sqlmesh_model = model_config.to_sqlmesh(context) - assert sqlmesh_model.dbt_name == "model.test_package.test_model" + assert sqlmesh_model.dbt_unique_id == "model.test_package.test_model" + assert sqlmesh_model.dbt_fqn == "test_package.test_model" @pytest.mark.slow @@ -872,12 +817,15 @@ def test_load_model_dbt_node_name(tmp_path: Path) -> None: # Verify that node_name is the equivalent dbt one model = context.snapshots[model_fqn].model - assert model.dbt_name == "model.test_project.simple_model" + assert model.dbt_unique_id == "model.test_project.simple_model" + assert model.dbt_fqn == "test_project.simple_model" + assert model.dbt_node_info + assert model.dbt_node_info.name == "simple_model" @pytest.mark.slow -def test_jinja_config_no_query(tmp_path, create_empty_project): - project_dir, model_dir = create_empty_project() +def test_jinja_config_no_query(create_empty_project): + project_dir, model_dir = create_empty_project(project_name="local") # model definition contains only a comment and non-SQL jinja model_contents = "/* comment */ {{ config(materialized='table') }}" diff --git a/tests/dbt/cli/fixtures/empty_project/dbt_project.yml b/tests/fixtures/dbt/empty_project/dbt_project.yml similarity index 94% rename from tests/dbt/cli/fixtures/empty_project/dbt_project.yml rename to tests/fixtures/dbt/empty_project/dbt_project.yml index beceadcd33..dab3d1e0e8 100644 --- a/tests/dbt/cli/fixtures/empty_project/dbt_project.yml +++ b/tests/fixtures/dbt/empty_project/dbt_project.yml @@ -1,7 +1,7 @@ name: 'empty_project' +version: '1.0.0' config-version: 2 -version: '0.1' profile: 'empty_project' diff --git a/tests/dbt/cli/fixtures/empty_project/profiles.yml b/tests/fixtures/dbt/empty_project/profiles.yml similarity index 85% rename from tests/dbt/cli/fixtures/empty_project/profiles.yml rename to tests/fixtures/dbt/empty_project/profiles.yml index a4f9836b7e..b352fc5792 100644 --- a/tests/dbt/cli/fixtures/empty_project/profiles.yml +++ b/tests/fixtures/dbt/empty_project/profiles.yml @@ -6,4 +6,4 @@ empty_project: duckdb: type: duckdb path: 'empty_project.duckdb' - threads: 4 + threads: 4 diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/dbt_project.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/dbt_project.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/dbt_project.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/dbt_project.yml diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/customers.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/customers.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/customers.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/customers.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/docs.md b/tests/fixtures/dbt/jaffle_shop_duckdb/models/docs.md similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/docs.md rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/docs.md diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/orders.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/orders.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/orders.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/orders.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/overview.md b/tests/fixtures/dbt/jaffle_shop_duckdb/models/overview.md similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/overview.md rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/overview.md diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/schema.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/models/schema.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/schema.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/schema.yml diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/schema.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/schema.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/schema.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/schema.yml diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_customers.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_customers.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_customers.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_customers.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_orders.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_orders.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_orders.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_orders.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_payments.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_payments.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_payments.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_payments.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/profiles.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/profiles.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/profiles.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/profiles.yml diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/.gitkeep b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/.gitkeep similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/.gitkeep rename to tests/fixtures/dbt/jaffle_shop_duckdb/seeds/.gitkeep diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_customers.csv b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_customers.csv similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_customers.csv rename to tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_customers.csv diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_orders.csv b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_orders.csv similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_orders.csv rename to tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_orders.csv diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_payments.csv b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_payments.csv similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_payments.csv rename to tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_payments.csv From 147a5bbdbcda5227500f5c068438de81342e9e1e Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Tue, 23 Sep 2025 08:09:05 -0700 Subject: [PATCH 009/173] Fix: Support seed model schema changes in dev-only VDE mode (#5419) --- sqlmesh/core/plan/common.py | 6 +++- sqlmesh/core/plan/stages.py | 2 +- sqlmesh/core/snapshot/evaluator.py | 18 ++++++++-- tests/core/test_integration.py | 56 ++++++++++++++++++++++++++++++ tests/core/test_plan.py | 1 + 5 files changed, 78 insertions(+), 5 deletions(-) diff --git a/sqlmesh/core/plan/common.py b/sqlmesh/core/plan/common.py index 2ae34fbfba..bece17639c 100644 --- a/sqlmesh/core/plan/common.py +++ b/sqlmesh/core/plan/common.py @@ -16,7 +16,11 @@ def should_force_rebuild(old: Snapshot, new: Snapshot) -> bool: if new.is_view and new.is_indirect_non_breaking and not new.is_forward_only: # View models always need to be rebuilt to reflect updated upstream dependencies return True - if new.is_seed and not new.is_metadata: + if new.is_seed and not ( + new.is_metadata + and new.previous_version + and new.previous_version.snapshot_id(new.name) == old.snapshot_id + ): # Seed models always need to be rebuilt to reflect changes in the seed file # Unless only their metadata has been updated (eg description added) and the seed file has not been touched return True diff --git a/sqlmesh/core/plan/stages.py b/sqlmesh/core/plan/stages.py index 9425608619..729e1705b4 100644 --- a/sqlmesh/core/plan/stages.py +++ b/sqlmesh/core/plan/stages.py @@ -268,7 +268,7 @@ def build(self, plan: EvaluatablePlan) -> t.List[PlanStage]: before_promote_snapshots = { s.snapshot_id for s in snapshots.values() - if deployability_index.is_representative(s) + if (deployability_index.is_representative(s) or s.is_seed) and plan.is_selected_for_backfill(s.name) } after_promote_snapshots = all_selected_for_backfill_snapshots - before_promote_snapshots diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index baf4dd67f1..70cc31b0a4 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -1141,10 +1141,10 @@ def _migrate_target_table( ) -> None: adapter = self.get_adapter(snapshot.model.gateway) - target_table = exp.to_table(target_table_name) - target_table.this.set("this", f"{target_table.name}_schema_tmp") + tmp_table = exp.to_table(target_table_name) + tmp_table.this.set("this", f"{tmp_table.name}_schema_tmp") + tmp_table_name = tmp_table.sql() - tmp_table_name = target_table.sql() if snapshot.is_materialized: self._execute_create( snapshot=snapshot, @@ -2185,6 +2185,18 @@ def create( self.adapter.drop_table(table_name) raise + def migrate( + self, + target_table_name: str, + source_table_name: str, + snapshot: Snapshot, + *, + ignore_destructive: bool, + ignore_additive: bool, + **kwargs: t.Any, + ) -> None: + raise NotImplementedError("Seeds do not support migrations.") + def insert( self, table_name: str, diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index a3f9584aa3..bac495a5f1 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -3232,6 +3232,62 @@ def test_virtual_environment_mode_dev_only_model_change_standalone_audit( context.apply(plan) +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_seed_model_change_schema( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + new_csv = [] + with open(context.path / "seeds" / "waiter_names.csv", "r") as fd: + is_header = True + for idx, line in enumerate(fd): + line = line.strip() + if not line: + continue + if is_header: + new_csv.append(line + ",new_column") + is_header = False + else: + new_csv.append(line + f",v{idx}") + + with open(context.path / "seeds" / "waiter_names.csv", "w") as fd: + fd.write("\n".join(new_csv)) + + context.load() + + downstream_model = context.get_model("sushi.waiter_as_customer_by_day") + downstream_model_kind = downstream_model.kind.dict() + downstream_model_kwargs = { + **downstream_model.dict(), + "kind": { + **downstream_model_kind, + "on_destructive_change": "allow", + }, + "audits": [], + # Use the new column + "query": "SELECT '2023-01-07' AS event_date, new_column AS new_column FROM sushi.waiter_names", + } + context.upsert_model(SqlModel.parse_obj(downstream_model_kwargs)) + + context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True, enable_preview=True) + + assert ( + context.engine_adapter.fetchone( + "SELECT COUNT(*) FROM sushi__dev.waiter_as_customer_by_day" + )[0] + == len(new_csv) - 1 + ) + + # Deploy to prod + context.clear_caches() + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + assert "new_column" in context.engine_adapter.columns("sushi.waiter_as_customer_by_day") + + @time_machine.travel("2023-01-08 15:00:00 UTC") def test_restatement_plan_ignores_changes(init_and_plan_context: t.Callable): context, plan = init_and_plan_context("examples/sushi") diff --git a/tests/core/test_plan.py b/tests/core/test_plan.py index 59bc91d1bf..40967f1fbe 100644 --- a/tests/core/test_plan.py +++ b/tests/core/test_plan.py @@ -1214,6 +1214,7 @@ def test_seed_model_metadata_change_no_missing_intervals( description="foo", ) ) + snapshot_a_metadata_updated.previous_versions = snapshot_a.all_versions assert snapshot_a_metadata_updated.version is None assert snapshot_a_metadata_updated.change_category is None From 1fb5010147e27629103bf2873c12ae13fd6a5f78 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Tue, 23 Sep 2025 19:21:00 +0300 Subject: [PATCH 010/173] Chore!: bump sqlglot to v27.17.0 (#5429) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1d34b340b5..59880e61c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.16.3", + "sqlglot[rs]~=27.17.0", "tenacity", "time-machine", "json-stream" From 131a4db8398c3aab180b886e35844023f86b7641 Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Tue, 23 Sep 2025 13:17:15 -0700 Subject: [PATCH 011/173] fix: bigquery snowflake source columns support (#5428) --- sqlmesh/core/engine_adapter/bigquery.py | 9 +++++---- sqlmesh/core/engine_adapter/snowflake.py | 14 ++++++++------ tests/core/engine_adapter/test_bigquery.py | 8 +++++++- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/sqlmesh/core/engine_adapter/bigquery.py b/sqlmesh/core/engine_adapter/bigquery.py index 00b33f67a5..0dfa2325e8 100644 --- a/sqlmesh/core/engine_adapter/bigquery.py +++ b/sqlmesh/core/engine_adapter/bigquery.py @@ -169,17 +169,18 @@ def _df_to_source_queries( ) def query_factory() -> Query: - if bigframes_pd and isinstance(df, bigframes_pd.DataFrame): - df.to_gbq( + ordered_df = df[list(source_columns_to_types)] + if bigframes_pd and isinstance(ordered_df, bigframes_pd.DataFrame): + ordered_df.to_gbq( f"{temp_bq_table.project}.{temp_bq_table.dataset_id}.{temp_bq_table.table_id}", if_exists="replace", ) elif not self.table_exists(temp_table): # Make mypy happy - assert isinstance(df, pd.DataFrame) + assert isinstance(ordered_df, pd.DataFrame) self._db_call(self.client.create_table, table=temp_bq_table, exists_ok=False) result = self.__load_pandas_to_table( - temp_bq_table, df, source_columns_to_types, replace=False + temp_bq_table, ordered_df, source_columns_to_types, replace=False ) if result.errors: raise SQLMeshError(result.errors) diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py index 355fb9719c..9c27b45115 100644 --- a/sqlmesh/core/engine_adapter/snowflake.py +++ b/sqlmesh/core/engine_adapter/snowflake.py @@ -378,6 +378,8 @@ def query_factory() -> Query: elif isinstance(df, pd.DataFrame): from snowflake.connector.pandas_tools import write_pandas + ordered_df = df[list(source_columns_to_types)] + # Workaround for https://github.com/snowflakedb/snowflake-connector-python/issues/1034 # The above issue has already been fixed upstream, but we keep the following # line anyway in order to support a wider range of Snowflake versions. @@ -388,16 +390,16 @@ def query_factory() -> Query: # See: https://stackoverflow.com/a/75627721 for column, kind in source_columns_to_types.items(): - if is_datetime64_any_dtype(df.dtypes[column]): + if is_datetime64_any_dtype(ordered_df.dtypes[column]): if kind.is_type("date"): # type: ignore - df[column] = pd.to_datetime(df[column]).dt.date # type: ignore - elif getattr(df.dtypes[column], "tz", None) is not None: # type: ignore - df[column] = pd.to_datetime(df[column]).dt.strftime( + ordered_df[column] = pd.to_datetime(ordered_df[column]).dt.date # type: ignore + elif getattr(ordered_df.dtypes[column], "tz", None) is not None: # type: ignore + ordered_df[column] = pd.to_datetime(ordered_df[column]).dt.strftime( "%Y-%m-%d %H:%M:%S.%f%z" ) # type: ignore # https://github.com/snowflakedb/snowflake-connector-python/issues/1677 else: # type: ignore - df[column] = pd.to_datetime(df[column]).dt.strftime( + ordered_df[column] = pd.to_datetime(ordered_df[column]).dt.strftime( "%Y-%m-%d %H:%M:%S.%f" ) # type: ignore @@ -407,7 +409,7 @@ def query_factory() -> Query: write_pandas( self._connection_pool.get(), - df, + ordered_df, temp_table.name, schema=temp_table.db or None, database=database.sql(dialect=self.dialect) if database else None, diff --git a/tests/core/engine_adapter/test_bigquery.py b/tests/core/engine_adapter/test_bigquery.py index 4328fa8923..f195bbaa2a 100644 --- a/tests/core/engine_adapter/test_bigquery.py +++ b/tests/core/engine_adapter/test_bigquery.py @@ -487,7 +487,13 @@ def temp_table_exists(table: exp.Table) -> bool: retry_resp_call.errors = None retry_mock.return_value = retry_resp db_call_mock.return_value = AttributeDict({"errors": None}) - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = pd.DataFrame( + { + "id": [1, 2, 3], + "ts": ["2025-01-01 00:00:00", "2025-01-01 00:00:00", "2025-01-01 00:00:00"], + "val": [7, 8, 9], + } + ) adapter.merge( target_table="target", source_table=df, From 97c6a127ed36b448666d495b89f526a60f69f985 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 24 Sep 2025 10:40:13 +1200 Subject: [PATCH 012/173] Fix: Sort nested AttributeDict's to prevent visual diff (#5397) --- sqlmesh/utils/jinja.py | 16 +++++++++++++++- tests/utils/test_jinja.py | 27 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/sqlmesh/utils/jinja.py b/sqlmesh/utils/jinja.py index c9339cf404..508c6dce2d 100644 --- a/sqlmesh/utils/jinja.py +++ b/sqlmesh/utils/jinja.py @@ -206,6 +206,20 @@ def extract_macro_references_and_variables( return macro_references, variables +def sort_dict_recursive( + item: t.Dict[str, t.Any], +) -> t.Dict[str, t.Any]: + sorted_dict: t.Dict[str, t.Any] = {} + for k, v in sorted(item.items()): + if isinstance(v, list): + sorted_dict[k] = sorted(v) + elif isinstance(v, dict): + sorted_dict[k] = sort_dict_recursive(v) + else: + sorted_dict[k] = v + return sorted_dict + + JinjaGlobalAttribute = t.Union[str, int, float, bool, AttributeDict] @@ -440,7 +454,7 @@ def to_expressions(self) -> t.List[Expression]: d.PythonCode( expressions=[ f"{k} = '{v}'" if isinstance(v, str) else f"{k} = {v}" - for k, v in sorted(filtered_objs.items()) + for k, v in sort_dict_recursive(filtered_objs).items() ] ) ) diff --git a/tests/utils/test_jinja.py b/tests/utils/test_jinja.py index 5eb00aeb3c..1cf7c1bf95 100644 --- a/tests/utils/test_jinja.py +++ b/tests/utils/test_jinja.py @@ -302,3 +302,30 @@ def test_dbt_adapter_macro_scope(): rendered = registry.build_environment().from_string("{{ spark__macro_a() }}").render() assert rendered.strip() == "macro_a" + + +def test_macro_registry_to_expressions_sorted(): + refs = AttributeDict( + { + "payments": { + "database": "jaffle_shop", + "schema": "main", + "nested": {"foo": "bar", "baz": "bing"}, + }, + "orders": {"schema": "main", "database": "jaffle_shop", "nested_list": ["b", "a", "c"]}, + } + ) + + registry = JinjaMacroRegistry() + registry.add_globals({"sources": {}, "refs": refs}) + + # Ensure that the AttributeDict string representation is sorted + # in order to prevent an unexpected *visual* diff in ModelDiff + # (note that the actual diff is based on the data hashes, so this is purely visual) + expressions = registry.to_expressions() + assert len(expressions) == 1 + assert ( + expressions[0].sql(dialect="duckdb") + == "refs = {'orders': {'database': 'jaffle_shop', 'nested_list': ['a', 'b', 'c'], 'schema': 'main'}, 'payments': {'database': 'jaffle_shop', 'nested': {'baz': 'bing', 'foo': 'bar'}, 'schema': 'main'}}\n" + "sources = {}" + ) From e7e4841eccf7feece5fb0d9ed7f8580c00e897a1 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Tue, 23 Sep 2025 18:19:15 -0700 Subject: [PATCH 013/173] Chore: Break up the core integration tests (#5432) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Makefile | 6 +- pyproject.toml | 1 + tests/conftest.py | 10 +- tests/core/integration/__init__.py | 0 tests/core/integration/conftest.py | 8 + tests/core/integration/test_audits.py | 348 + .../core/integration/test_auto_restatement.py | 219 + tests/core/integration/test_aux_commands.py | 367 + .../core/integration/test_change_scenarios.py | 1484 +++ tests/core/integration/test_config.py | 580 + tests/core/integration/test_cron.py | 247 + tests/core/integration/test_dbt.py | 125 + tests/core/integration/test_dev_only_vde.py | 477 + tests/core/integration/test_forward_only.py | 1510 +++ tests/core/integration/test_model_kinds.py | 2644 ++++ tests/core/integration/test_multi_repo.py | 456 + tests/core/integration/test_plan_options.py | 478 + tests/core/integration/test_restatement.py | 1882 +++ tests/core/integration/test_run.py | 247 + tests/core/integration/utils.py | 350 + tests/core/test_dialect.py | 2 + tests/core/test_integration.py | 10887 ---------------- 22 files changed, 11437 insertions(+), 10891 deletions(-) create mode 100644 tests/core/integration/__init__.py create mode 100644 tests/core/integration/conftest.py create mode 100644 tests/core/integration/test_audits.py create mode 100644 tests/core/integration/test_auto_restatement.py create mode 100644 tests/core/integration/test_aux_commands.py create mode 100644 tests/core/integration/test_change_scenarios.py create mode 100644 tests/core/integration/test_config.py create mode 100644 tests/core/integration/test_cron.py create mode 100644 tests/core/integration/test_dbt.py create mode 100644 tests/core/integration/test_dev_only_vde.py create mode 100644 tests/core/integration/test_forward_only.py create mode 100644 tests/core/integration/test_model_kinds.py create mode 100644 tests/core/integration/test_multi_repo.py create mode 100644 tests/core/integration/test_plan_options.py create mode 100644 tests/core/integration/test_restatement.py create mode 100644 tests/core/integration/test_run.py create mode 100644 tests/core/integration/utils.py delete mode 100644 tests/core/test_integration.py diff --git a/Makefile b/Makefile index 40874f7972..fbf77b8f9b 100644 --- a/Makefile +++ b/Makefile @@ -117,13 +117,13 @@ engine-up: engine-clickhouse-up engine-mssql-up engine-mysql-up engine-postgres- engine-down: engine-clickhouse-down engine-mssql-down engine-mysql-down engine-postgres-down engine-spark-down engine-trino-down fast-test: - pytest -n auto -m "fast and not cicdonly" --junitxml=test-results/junit-fast-test.xml && pytest -m "isolated" && pytest -m "registry_isolation" + pytest -n auto -m "fast and not cicdonly" --junitxml=test-results/junit-fast-test.xml && pytest -m "isolated" && pytest -m "registry_isolation" && pytest -m "dialect_isolated" slow-test: - pytest -n auto -m "(fast or slow) and not cicdonly" && pytest -m "isolated" && pytest -m "registry_isolation" + pytest -n auto -m "(fast or slow) and not cicdonly" && pytest -m "isolated" && pytest -m "registry_isolation" && pytest -m "dialect_isolated" cicd-test: - pytest -n auto -m "fast or slow" --junitxml=test-results/junit-cicd.xml && pytest -m "isolated" && pytest -m "registry_isolation" + pytest -n auto -m "fast or slow" --junitxml=test-results/junit-cicd.xml && pytest -m "isolated" && pytest -m "registry_isolation" && pytest -m "dialect_isolated" core-fast-test: pytest -n auto -m "fast and not web and not github and not dbt and not jupyter" diff --git a/pyproject.toml b/pyproject.toml index 59880e61c5..b3e13b63ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -241,6 +241,7 @@ markers = [ "remote: test that involves interacting with a remote DB", "cicdonly: test that only runs on CI/CD", "isolated: tests that need to run sequentially usually because they use fork", + "dialect_isolated: tests that need to run separately due to global dialect overrides", # Test Domain Markers # default: core functionality diff --git a/tests/conftest.py b/tests/conftest.py index e5bbc4f425..7a61281ad0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -202,7 +202,15 @@ def validate( def pytest_collection_modifyitems(items, *args, **kwargs): - test_type_markers = {"fast", "slow", "docker", "remote", "isolated", "registry_isolation"} + test_type_markers = { + "fast", + "slow", + "docker", + "remote", + "isolated", + "registry_isolation", + "dialect_isolated", + } for item in items: for marker in item.iter_markers(): if marker.name in test_type_markers: diff --git a/tests/core/integration/__init__.py b/tests/core/integration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/core/integration/conftest.py b/tests/core/integration/conftest.py new file mode 100644 index 0000000000..99875e5974 --- /dev/null +++ b/tests/core/integration/conftest.py @@ -0,0 +1,8 @@ +import pytest +from pytest_mock.plugin import MockerFixture + + +@pytest.fixture(autouse=True) +def mock_choices(mocker: MockerFixture): + mocker.patch("sqlmesh.core.console.TerminalConsole._get_snapshot_change_category") + mocker.patch("sqlmesh.core.console.TerminalConsole._prompt_backfill") diff --git a/tests/core/integration/test_audits.py b/tests/core/integration/test_audits.py new file mode 100644 index 0000000000..457974fdac --- /dev/null +++ b/tests/core/integration/test_audits.py @@ -0,0 +1,348 @@ +from __future__ import annotations + +import typing as t +from textwrap import dedent +import pytest +from pathlib import Path +import time_machine +from sqlglot import exp +from IPython.utils.capture import capture_output + +from sqlmesh.core.config import ( + Config, + ModelDefaultsConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.utils.errors import ( + PlanError, +) +from tests.utils.test_helpers import use_terminal_console +from tests.utils.test_filesystem import create_temp_file + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@use_terminal_console +def test_audit_only_metadata_change(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Add a new audit + model = context.get_model("sushi.waiter_revenue_by_day") + audits = model.audits.copy() + audits.append(("number_of_rows", {"threshold": exp.Literal.number(1)})) + model = model.copy(update={"audits": audits}) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + assert len(plan.new_snapshots) == 2 + assert all(s.change_category.is_metadata for s in plan.new_snapshots) + assert not plan.missing_intervals + + with capture_output() as output: + context.apply(plan) + + assert "Auditing models" in output.stdout + assert model.name in output.stdout + + +@use_terminal_console +def test_audits_running_on_metadata_changes(tmp_path: Path): + def setup_senario(model_before: str, model_after: str): + models_dir = Path("models") + create_temp_file(tmp_path, models_dir / "test.sql", model_before) + + # Create first snapshot + context = Context(paths=tmp_path, config=Config()) + context.plan("prod", no_prompts=True, auto_apply=True) + + # Create second (metadata) snapshot + create_temp_file(tmp_path, models_dir / "test.sql", model_after) + context.load() + + with capture_output() as output: + with pytest.raises(PlanError): + context.plan("prod", no_prompts=True, auto_apply=True) + + assert 'Failed models\n\n "model"' in output.stdout + + return output + + # Ensure incorrect audits (bad data, incorrect definition etc) are evaluated immediately + output = setup_senario( + "MODEL (name model); SELECT NULL AS col", + "MODEL (name model, audits (not_null(columns=[col]))); SELECT NULL AS col", + ) + assert "'not_null' audit error: 1 row failed" in output.stdout + + output = setup_senario( + "MODEL (name model); SELECT NULL AS col", + "MODEL (name model, audits (not_null(columns=[this_col_does_not_exist]))); SELECT NULL AS col", + ) + assert ( + 'Binder Error: Referenced column "this_col_does_not_exist" not found in \nFROM clause!' + in output.stdout + ) + + +@pytest.mark.slow +def test_default_audits_applied_in_plan(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + + # Create a model with data that will pass the audits + create_temp_file( + tmp_path, + models_dir / "orders.sql", + dedent(""" + MODEL ( + name test.orders, + kind FULL + ); + + SELECT + 1 AS order_id, + 'customer_1' AS customer_id, + 100.50 AS amount, + '2024-01-01'::DATE AS order_date + UNION ALL + SELECT + 2 AS order_id, + 'customer_2' AS customer_id, + 200.75 AS amount, + '2024-01-02'::DATE AS order_date + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", + audits=[ + "not_null(columns := [order_id, customer_id])", + "unique_values(columns := [order_id])", + ], + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Create and apply plan, here audits should pass + plan = context.plan("prod", no_prompts=True) + context.apply(plan) + + # Verify model has the default audits + model = context.get_model("test.orders") + assert len(model.audits) == 2 + + audit_names = [audit[0] for audit in model.audits] + assert "not_null" in audit_names + assert "unique_values" in audit_names + + # Verify audit arguments are preserved + for audit_name, audit_args in model.audits: + if audit_name == "not_null": + assert "columns" in audit_args + columns = [col.name for col in audit_args["columns"].expressions] + assert "order_id" in columns + assert "customer_id" in columns + elif audit_name == "unique_values": + assert "columns" in audit_args + columns = [col.name for col in audit_args["columns"].expressions] + assert "order_id" in columns + + +@pytest.mark.slow +def test_default_audits_fail_on_bad_data(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + + # Create a model with data that violates NOT NULL constraint + create_temp_file( + tmp_path, + models_dir / "bad_orders.sql", + dedent(""" + MODEL ( + name test.bad_orders, + kind FULL + ); + + SELECT + 1 AS order_id, + NULL AS customer_id, -- This violates NOT NULL + 100.50 AS amount, + '2024-01-01'::DATE AS order_date + UNION ALL + SELECT + 2 AS order_id, + 'customer_2' AS customer_id, + 200.75 AS amount, + '2024-01-02'::DATE AS order_date + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", audits=["not_null(columns := [customer_id])"] + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Plan should fail due to audit failure + with pytest.raises(PlanError): + context.plan("prod", no_prompts=True, auto_apply=True) + + +@pytest.mark.slow +def test_default_audits_with_model_specific_audits(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + audits_dir = tmp_path / "audits" + audits_dir.mkdir(exist_ok=True) + + create_temp_file( + tmp_path, + audits_dir / "range_check.sql", + dedent(""" + AUDIT ( + name range_check + ); + + SELECT * FROM @this_model + WHERE @column < @min_value OR @column > @max_value + """), + ) + + # Create a model with its own audits in addition to defaults + create_temp_file( + tmp_path, + models_dir / "products.sql", + dedent(""" + MODEL ( + name test.products, + kind FULL, + audits ( + range_check(column := price, min_value := 0, max_value := 10000) + ) + ); + + SELECT + 1 AS product_id, + 'Widget' AS product_name, + 99.99 AS price + UNION ALL + SELECT + 2 AS product_id, + 'Gadget' AS product_name, + 149.99 AS price + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", + audits=[ + "not_null(columns := [product_id, product_name])", + "unique_values(columns := [product_id])", + ], + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Create and apply plan + plan = context.plan("prod", no_prompts=True) + context.apply(plan) + + # Verify model has both default and model-specific audits + model = context.get_model("test.products") + assert len(model.audits) == 3 + + audit_names = [audit[0] for audit in model.audits] + assert "not_null" in audit_names + assert "unique_values" in audit_names + assert "range_check" in audit_names + + # Verify audit execution order, default audits first then model-specific + assert model.audits[0][0] == "not_null" + assert model.audits[1][0] == "unique_values" + assert model.audits[2][0] == "range_check" + + +@pytest.mark.slow +def test_default_audits_with_custom_audit_definitions(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + audits_dir = tmp_path / "audits" + audits_dir.mkdir(exist_ok=True) + + # Create custom audit definition + create_temp_file( + tmp_path, + audits_dir / "positive_amount.sql", + dedent(""" + AUDIT ( + name positive_amount + ); + + SELECT * FROM @this_model + WHERE @column <= 0 + """), + ) + + # Create a model + create_temp_file( + tmp_path, + models_dir / "transactions.sql", + dedent(""" + MODEL ( + name test.transactions, + kind FULL + ); + + SELECT + 1 AS transaction_id, + 'TXN001' AS transaction_code, + 250.00 AS amount, + '2024-01-01'::DATE AS transaction_date + UNION ALL + SELECT + 2 AS transaction_id, + 'TXN002' AS transaction_code, + 150.00 AS amount, + '2024-01-02'::DATE AS transaction_date + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", + audits=[ + "not_null(columns := [transaction_id, transaction_code])", + "unique_values(columns := [transaction_id])", + "positive_amount(column := amount)", + ], + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Create and apply plan + plan = context.plan("prod", no_prompts=True) + context.apply(plan) + + # Verify model has all default audits including custom + model = context.get_model("test.transactions") + assert len(model.audits) == 3 + + audit_names = [audit[0] for audit in model.audits] + assert "not_null" in audit_names + assert "unique_values" in audit_names + assert "positive_amount" in audit_names + + # Verify custom audit arguments + for audit_name, audit_args in model.audits: + if audit_name == "positive_amount": + assert "column" in audit_args + assert audit_args["column"].name == "amount" diff --git a/tests/core/integration/test_auto_restatement.py b/tests/core/integration/test_auto_restatement.py new file mode 100644 index 0000000000..70ca227fd3 --- /dev/null +++ b/tests/core/integration/test_auto_restatement.py @@ -0,0 +1,219 @@ +from __future__ import annotations + +import typing as t +import pandas as pd # noqa: TID253 +import pytest +import time_machine +from sqlglot import exp + +from sqlmesh.core import dialect as d +from sqlmesh.core.macros import macro +from sqlmesh.core.model import ( + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.utils.date import to_timestamp + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 01:00:00 UTC") +def test_run_auto_restatement(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + context.engine_adapter.execute( + "CREATE TABLE _test_auto_restatement_intervals (name STRING, start_ds STRING, end_ds STRING)" + ) + + @macro() + def record_intervals( + evaluator, name: exp.Expression, start: exp.Expression, end: exp.Expression, **kwargs: t.Any + ) -> None: + if evaluator.runtime_stage == "evaluating": + evaluator.engine_adapter.insert_append( + "_test_auto_restatement_intervals", + pd.DataFrame({"name": [name.name], "start_ds": [start.name], "end_ds": [end.name]}), + ) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday + auto_restatement_intervals 3, + ), + start '2023-01-01', + ); + + @record_intervals('new_model', @start_ds, @end_ds); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + context.upsert_model(new_model) + + new_model_downstream_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model_downstream, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + ), + cron '@hourly', + ); + + @record_intervals('new_model_downstream', @start_ts, @end_ts); + + SELECT * FROM memory.sushi.new_model; + """ + ) + new_model_downstream = load_sql_based_model(new_model_downstream_expr) + context.upsert_model(new_model_downstream) + + plan = context.plan_builder("prod").build() + context.apply(plan) + + with time_machine.travel("2023-01-08 06:01:00 UTC"): + assert context.run() + + recorded_intervals_df = context.engine_adapter.fetchdf( + "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model'" + ) + # The first interval is the first backfill and the second interval should be the 3 auto restated intervals + assert recorded_intervals_df.to_dict() == { + "start_ds": {0: "2023-01-01", 1: "2023-01-05"}, + "end_ds": {0: "2023-01-07", 1: "2023-01-07"}, + } + recorded_intervals_downstream_df = context.engine_adapter.fetchdf( + "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model_downstream'" + ) + # The first interval is the first backfill, the second interval should be the 3 days of restated intervals, and + # the third interval should catch up to the current hour + assert recorded_intervals_downstream_df.to_dict() == { + "start_ds": { + 0: "2023-01-01 00:00:00", + 1: "2023-01-05 00:00:00", + 2: "2023-01-08 01:00:00", + }, + "end_ds": { + 0: "2023-01-08 00:59:59.999999", + 1: "2023-01-07 23:59:59.999999", + 2: "2023-01-08 05:59:59.999999", + }, + } + + snapshot = context.get_snapshot(new_model.name) + snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ + snapshot.snapshot_id + ] + assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") + assert not snapshot.pending_restatement_intervals + + snapshot_downstream = context.get_snapshot(new_model_downstream.name) + snapshot_downstream = context.state_sync.state_sync.get_snapshots( + [snapshot_downstream.snapshot_id] + )[snapshot_downstream.snapshot_id] + assert not snapshot_downstream.next_auto_restatement_ts + assert not snapshot_downstream.pending_restatement_intervals + + +@time_machine.travel("2023-01-08 01:00:00 UTC") +def test_run_auto_restatement_plan_preview(init_and_plan_context: t.Callable): + context, init_plan = init_and_plan_context("examples/sushi") + context.apply(init_plan) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + auto_restatement_cron '0 6 * * 7', + ), + start '2023-01-01', + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + context.upsert_model(new_model) + snapshot = context.get_snapshot(new_model.name) + + plan_dev = context.plan_builder("dev").build() + # Make sure that a limited preview is computed by default + assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") + assert plan_dev.missing_intervals == [ + SnapshotIntervals( + snapshot.snapshot_id, + [(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ) + ] + assert not plan_dev.deployability_index.is_deployable(snapshot.snapshot_id) + context.apply(plan_dev) + + plan_prod = context.plan_builder("prod").build() + assert plan_prod.missing_intervals == [ + SnapshotIntervals( + context.get_snapshot(new_model.name).snapshot_id, + [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ) + ] + context.apply(plan_prod) + + +@time_machine.travel("2023-01-08 01:00:00 UTC") +def test_run_auto_restatement_failure(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + @macro() + def fail_auto_restatement(evaluator, start: exp.Expression, **kwargs: t.Any) -> None: + if evaluator.runtime_stage == "evaluating" and start.name != "2023-01-01": + raise Exception("Failed") + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday + auto_restatement_intervals 3, + ), + start '2023-01-01', + ); + + @fail_auto_restatement(@start_ds); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + context.upsert_model(new_model) + + plan = context.plan_builder("prod").build() + context.apply(plan) + + with time_machine.travel("2023-01-08 06:01:00 UTC"): + run_status = context.run() + assert run_status.is_failure + + snapshot = context.get_snapshot(new_model.name) + snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ + snapshot.snapshot_id + ] + assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") + assert snapshot.pending_restatement_intervals == [ + (to_timestamp("2023-01-05"), to_timestamp("2023-01-08")) + ] diff --git a/tests/core/integration/test_aux_commands.py b/tests/core/integration/test_aux_commands.py new file mode 100644 index 0000000000..ecdd3e05fc --- /dev/null +++ b/tests/core/integration/test_aux_commands.py @@ -0,0 +1,367 @@ +from __future__ import annotations + +import typing as t +from unittest.mock import patch +import pytest +from pathlib import Path +from sqlmesh.core.config.naming import NameInferenceConfig +from sqlmesh.core.model.common import ParsableSql +import time_machine +from pytest_mock.plugin import MockerFixture + +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.core.model import ( + SqlModel, +) +from sqlmesh.utils.errors import ( + SQLMeshError, +) +from sqlmesh.utils.date import now +from tests.conftest import DuckDBMetadata +from tests.utils.test_helpers import use_terminal_console +from tests.utils.test_filesystem import create_temp_file +from tests.core.integration.utils import add_projection_to_model, apply_to_environment + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_table_name(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert snapshot + assert ( + context.table_name("sushi.waiter_revenue_by_day", "prod") + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" + ) + + with pytest.raises(SQLMeshError, match="Environment 'dev' was not found."): + context.table_name("sushi.waiter_revenue_by_day", "dev") + + with pytest.raises( + SQLMeshError, match="Model 'sushi.missing' was not found in environment 'prod'." + ): + context.table_name("sushi.missing", "prod") + + # Add a new projection + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("dev_a", auto_apply=True, no_prompts=True, skip_tests=True) + + new_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert new_snapshot.version != snapshot.version + + assert ( + context.table_name("sushi.waiter_revenue_by_day", "dev_a") + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{new_snapshot.version}" + ) + + # Make a forward-only change + context.upsert_model(model, stamp="forward_only") + + context.plan("dev_b", auto_apply=True, no_prompts=True, skip_tests=True, forward_only=True) + + forward_only_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert forward_only_snapshot.version == snapshot.version + assert forward_only_snapshot.dev_version != snapshot.version + + assert ( + context.table_name("sushi.waiter_revenue_by_day", "dev_b") + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{forward_only_snapshot.dev_version}__dev" + ) + + assert ( + context.table_name("sushi.waiter_revenue_by_day", "dev_b", prod=True) + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" + ) + + +def test_janitor_cleanup_order(mocker: MockerFixture, tmp_path: Path): + def setup_scenario(): + models_dir = tmp_path / "models" + + if not models_dir.exists(): + models_dir.mkdir() + + model1_path = models_dir / "model1.sql" + + with open(model1_path, "w") as f: + f.write("MODEL(name test.model1, kind FULL); SELECT 1 AS col") + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + ) + ctx = Context(paths=[tmp_path], config=config) + + ctx.plan("dev", no_prompts=True, auto_apply=True) + + model1_snapshot = ctx.get_snapshot("test.model1") + + # Delete the model file to cause a snapshot expiration + model1_path.unlink() + + ctx.load() + + ctx.plan("dev", no_prompts=True, auto_apply=True) + + # Invalidate the environment to cause an environment cleanup + ctx.invalidate_environment("dev") + + try: + ctx._run_janitor(ignore_ttl=True) + except: + pass + + return ctx, model1_snapshot + + # Case 1: Assume that the snapshot cleanup yields an error, the snapshot records + # should still exist in the state sync so the next janitor can retry + mocker.patch( + "sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup", + side_effect=Exception("snapshot cleanup error"), + ) + ctx, model1_snapshot = setup_scenario() + + # - Check that the snapshot record exists in the state sync + state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) + assert state_snapshot + + # - Run the janitor again, this time it should succeed + mocker.patch("sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup") + ctx._run_janitor(ignore_ttl=True) + + # - Check that the snapshot record does not exist in the state sync anymore + state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) + assert not state_snapshot + + # Case 2: Assume that the view cleanup yields an error, the enviroment + # record should still exist + mocker.patch( + "sqlmesh.core.context.cleanup_expired_views", side_effect=Exception("view cleanup error") + ) + ctx, model1_snapshot = setup_scenario() + + views = ctx.fetchdf("FROM duckdb_views() SELECT * EXCLUDE(sql) WHERE NOT internal") + assert views.empty + + # - Check that the environment record exists in the state sync + assert ctx.state_sync.get_environment("dev") + + # - Run the janitor again, this time it should succeed + mocker.patch("sqlmesh.core.context.cleanup_expired_views") + ctx._run_janitor(ignore_ttl=True) + + # - Check that the environment record does not exist in the state sync anymore + assert not ctx.state_sync.get_environment("dev") + + +@use_terminal_console +def test_destroy(copy_to_temp_path): + # Testing project with two gateways to verify cleanup is performed across engines + paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") + path = Path(paths[0]) + first_db_path = str(path / "db_1.db") + second_db_path = str(path / "db_2.db") + + config = Config( + gateways={ + "first": GatewayConfig( + connection=DuckDBConnectionConfig(database=first_db_path), + variables={"overriden_var": "gateway_1"}, + ), + "second": GatewayConfig( + connection=DuckDBConnectionConfig(database=second_db_path), + variables={"overriden_var": "gateway_2"}, + ), + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + model_naming=NameInferenceConfig(infer_names=True), + default_gateway="first", + gateway_managed_virtual_layer=True, + variables={"overriden_var": "global", "global_one": 88}, + ) + + context = Context(paths=paths, config=config) + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 4 + context.apply(plan) + + # Confirm cache exists + cache_path = Path(path) / ".cache" + assert cache_path.exists() + assert len(list(cache_path.iterdir())) > 0 + + model = context.get_model("db_1.first_schema.model_one") + + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder().build() + context.apply(plan) + + state_environments = context.state_reader.get_environments() + state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) + + assert len(state_snapshots) == len(state_environments[0].snapshots) + + # Create dev environment with changed models + model = context.get_model("db_2.second_schema.model_one") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + model = context.get_model("first_schema.model_two") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder("dev").build() + context.apply(plan) + + dev_environment = context.state_sync.get_environment("dev") + assert dev_environment is not None + + state_environments = context.state_reader.get_environments() + state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) + assert ( + len(state_snapshots) + == len(state_environments[0].snapshots) + == len(state_environments[1].snapshots) + ) + + # The state tables at this point should be able to be retrieved + state_tables = { + "_environments", + "_snapshots", + "_intervals", + "_auto_restatements", + "_environment_statements", + "_intervals", + "_versions", + } + for table_name in state_tables: + context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") + + # The actual tables as well + context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_one") + context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_two") + context.fetchdf(f"SELECT * FROM db_1.first_schema.model_one") + context.fetchdf(f"SELECT * FROM db_1.first_schema.model_two") + + # Use the destroy command to remove all data objects and state + # Mock the console confirmation to automatically return True + with patch.object(context.console, "_confirm", return_value=True): + context._destroy() + + # Ensure all tables have been removed + for table_name in state_tables: + with pytest.raises( + Exception, match=f"Catalog Error: Table with name {table_name} does not exist!" + ): + context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") + + # Validate tables have been deleted as well + with pytest.raises( + Exception, match=r"Catalog Error: Table with name model_two does not exist!" + ): + context.fetchdf("SELECT * FROM db_1.first_schema.model_two") + with pytest.raises( + Exception, match=r"Catalog Error: Table with name model_one does not exist!" + ): + context.fetchdf("SELECT * FROM db_1.first_schema.model_one") + + with pytest.raises( + Exception, match=r"Catalog Error: Table with name model_two does not exist!" + ): + context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_two") + with pytest.raises( + Exception, match=r"Catalog Error: Table with name model_one does not exist!" + ): + context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_one") + + # Ensure the cache has been removed + assert not cache_path.exists() + + +@use_terminal_console +def test_render_path_instead_of_model(tmp_path: Path): + create_temp_file(tmp_path, Path("models/test.sql"), "MODEL (name test_model); SELECT 1 AS col") + ctx = Context(paths=tmp_path, config=Config()) + + # Case 1: Fail gracefully when the user is passing in a path instead of a model name + for test_model in ["models/test.sql", "models/test.py"]: + with pytest.raises( + SQLMeshError, + match="Resolving models by path is not supported, please pass in the model name instead.", + ): + ctx.render(test_model) + + # Case 2: Fail gracefully when the model name is not found + with pytest.raises(SQLMeshError, match="Cannot find model with name 'incorrect_model'"): + ctx.render("incorrect_model") + + # Case 3: Render the model successfully + assert ctx.render("test_model").sql() == 'SELECT 1 AS "col"' + + +def test_invalidating_environment(sushi_context: Context): + apply_to_environment(sushi_context, "dev") + start_environment = sushi_context.state_sync.get_environment("dev") + assert start_environment is not None + metadata = DuckDBMetadata.from_context(sushi_context) + start_schemas = set(metadata.schemas) + assert "sushi__dev" in start_schemas + sushi_context.invalidate_environment("dev") + invalidate_environment = sushi_context.state_sync.get_environment("dev") + assert invalidate_environment is not None + schemas_prior_to_janitor = set(metadata.schemas) + assert invalidate_environment.expiration_ts < start_environment.expiration_ts # type: ignore + assert start_schemas == schemas_prior_to_janitor + sushi_context._run_janitor() + schemas_after_janitor = set(metadata.schemas) + assert sushi_context.state_sync.get_environment("dev") is None + assert start_schemas - schemas_after_janitor == {"sushi__dev"} + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_evaluate_uncategorized_snapshot(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Add a new projection + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + # Downstream model references the new projection + downstream_model = context.get_model("sushi.top_waiters") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_model), literal=False)) + + df = context.evaluate( + "sushi.top_waiters", start="2023-01-05", end="2023-01-06", execution_time=now() + ) + assert set(df["one"].tolist()) == {1} diff --git a/tests/core/integration/test_change_scenarios.py b/tests/core/integration/test_change_scenarios.py new file mode 100644 index 0000000000..816f41afe6 --- /dev/null +++ b/tests/core/integration/test_change_scenarios.py @@ -0,0 +1,1484 @@ +from __future__ import annotations + +import typing as t +import json +from datetime import timedelta +from unittest import mock +import pandas as pd # noqa: TID253 +import pytest +from pathlib import Path +from sqlmesh.core.model.common import ParsableSql +import time_machine +from sqlglot.expressions import DataType +import re + +from sqlmesh.cli.project_init import init_example_project +from sqlmesh.core import constants as c +from sqlmesh.core import dialect as d +from sqlmesh.core.config import ( + AutoCategorizationMode, + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + FullKind, + ModelKind, + ModelKindName, + SqlModel, + PythonModel, + ViewKind, + load_sql_based_model, +) +from sqlmesh.core.model.kind import model_kind_type_from_name +from sqlmesh.core.plan import Plan, SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import now, to_timestamp +from sqlmesh.utils.errors import ( + SQLMeshError, +) +from tests.core.integration.utils import ( + apply_to_environment, + add_projection_to_model, + initial_add, + change_data_type, + validate_apply_basics, + change_model_kind, + validate_model_kind_change, + validate_query_change, + validate_plan_changes, +) + +pytestmark = pytest.mark.slow + + +def test_auto_categorization(sushi_context: Context): + environment = "dev" + for config in sushi_context.configs.values(): + config.plan.auto_categorize_changes.sql = AutoCategorizationMode.FULL + initial_add(sushi_context, environment) + + version = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).version + fingerprint = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + + model = t.cast(SqlModel, sushi_context.get_model("sushi.customers", raise_if_missing=True)) + sushi_context.upsert_model( + "sushi.customers", + query_=ParsableSql(sql=model.query.select("'foo' AS foo").sql(dialect=model.dialect)), # type: ignore + ) + apply_to_environment(sushi_context, environment) + + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + != fingerprint + ) + assert ( + sushi_context.get_snapshot("sushi.waiter_as_customer_by_day", raise_if_missing=True).version + == version + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_breaking_only_impacts_immediate_children(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + breaking_model = context.get_model("sushi.orders") + breaking_model = breaking_model.copy(update={"stamp": "force new version"}) + context.upsert_model(breaking_model) + breaking_snapshot = context.get_snapshot(breaking_model, raise_if_missing=True) + + non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) + non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) + top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan_builder = context.plan_builder("dev", skip_tests=True, enable_preview=False) + plan_builder.set_choice(breaking_snapshot, SnapshotChangeCategory.BREAKING) + plan = plan_builder.build() + assert ( + plan.context_diff.snapshots[breaking_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert not any(i.snapshot_id == top_waiter_snapshot.snapshot_id for i in plan.missing_intervals) + + context.apply(plan) + assert ( + not context.plan_builder("dev", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.missing_intervals + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@pytest.mark.parametrize( + "context_fixture", + ["sushi_context", "sushi_dbt_context", "sushi_test_dbt_context", "sushi_no_default_catalog"], +) +def test_model_add(context_fixture: Context, request): + initial_add(request.getfixturevalue(context_fixture), "dev") + + +def test_model_removed(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + + top_waiters_snapshot_id = sushi_context.get_snapshot( + "sushi.top_waiters", raise_if_missing=True + ).snapshot_id + + sushi_context._models.pop('"memory"."sushi"."top_waiters"') + + def _validate_plan(context, plan): + validate_plan_changes(plan, removed=[top_waiters_snapshot_id]) + assert not plan.missing_intervals + + def _validate_apply(context): + assert not sushi_context.get_snapshot("sushi.top_waiters", raise_if_missing=False) + assert sushi_context.state_reader.get_snapshots([top_waiters_snapshot_id]) + env = sushi_context.state_reader.get_environment(environment) + assert env + assert all(snapshot.name != '"memory"."sushi"."top_waiters"' for snapshot in env.snapshots) + + apply_to_environment( + sushi_context, + environment, + SnapshotChangeCategory.BREAKING, + plan_validators=[_validate_plan], + apply_validators=[_validate_apply], + ) + + +def test_non_breaking_change(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + validate_query_change(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING, False) + + +def test_breaking_change(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + validate_query_change(sushi_context, environment, SnapshotChangeCategory.BREAKING, False) + + +def test_logical_change(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + previous_sushi_items_version = sushi_context.get_snapshot( + "sushi.items", raise_if_missing=True + ).version + + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.FLOAT, + DataType.Type.DOUBLE, + ) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + assert ( + sushi_context.get_snapshot("sushi.items", raise_if_missing=True).version + == previous_sushi_items_version + ) + + +@pytest.mark.parametrize( + "from_, to", + [ + (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.FULL), + (ModelKindName.FULL, ModelKindName.INCREMENTAL_BY_TIME_RANGE), + ], +) +def test_model_kind_change(from_: ModelKindName, to: ModelKindName, sushi_context: Context): + environment = f"test_model_kind_change__{from_.value.lower()}__{to.value.lower()}" + incremental_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True).copy() + + if from_ != ModelKindName.INCREMENTAL_BY_TIME_RANGE: + change_model_kind(sushi_context, from_) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + if to == ModelKindName.INCREMENTAL_BY_TIME_RANGE: + sushi_context.upsert_model(incremental_snapshot.model) + else: + change_model_kind(sushi_context, to) + + logical = to in (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.EMBEDDED) + validate_model_kind_change(to, sushi_context, environment, logical=logical) + + +def test_environment_isolation(sushi_context: Context): + prod_snapshots = sushi_context.snapshots.values() + + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + directly_modified = ['"memory"."sushi"."items"'] + indirectly_modified = [ + '"memory"."sushi"."order_items"', + '"memory"."sushi"."waiter_revenue_by_day"', + '"memory"."sushi"."customer_revenue_by_day"', + '"memory"."sushi"."customer_revenue_lifetime"', + '"memory"."sushi"."top_waiters"', + "assert_item_price_above_zero", + ] + + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) + + # Verify prod unchanged + validate_apply_basics(sushi_context, "prod", prod_snapshots) + + def _validate_plan(context, plan): + validate_plan_changes(plan, modified=directly_modified + indirectly_modified) + assert not plan.missing_intervals + + apply_to_environment( + sushi_context, + "prod", + SnapshotChangeCategory.BREAKING, + plan_validators=[_validate_plan], + ) + + +def test_environment_promotion(sushi_context: Context): + initial_add(sushi_context, "dev") + + # Simulate prod "ahead" + change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) + apply_to_environment(sushi_context, "prod", SnapshotChangeCategory.BREAKING) + + # Simulate rebase + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) + + # Make changes in dev + change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DECIMAL) + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.NON_BREAKING) + + change_data_type(sushi_context, "sushi.top_waiters", DataType.Type.DOUBLE, DataType.Type.INT) + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) + + change_data_type( + sushi_context, + "sushi.customer_revenue_by_day", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + apply_to_environment( + sushi_context, + "dev", + SnapshotChangeCategory.FORWARD_ONLY, + allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], + ) + + # Promote to prod + def _validate_plan(context, plan): + sushi_items_snapshot = context.get_snapshot("sushi.items", raise_if_missing=True) + sushi_top_waiters_snapshot = context.get_snapshot( + "sushi.top_waiters", raise_if_missing=True + ) + sushi_customer_revenue_by_day_snapshot = context.get_snapshot( + "sushi.customer_revenue_by_day", raise_if_missing=True + ) + + assert ( + plan.context_diff.modified_snapshots[sushi_items_snapshot.name][0].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.modified_snapshots[sushi_top_waiters_snapshot.name][0].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.modified_snapshots[sushi_customer_revenue_by_day_snapshot.name][ + 0 + ].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert plan.context_diff.snapshots[ + sushi_customer_revenue_by_day_snapshot.snapshot_id + ].is_forward_only + + apply_to_environment( + sushi_context, + "prod", + SnapshotChangeCategory.NON_BREAKING, + plan_validators=[_validate_plan], + allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], + ) + + +def test_no_override(sushi_context: Context) -> None: + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.INT, + DataType.Type.BIGINT, + ) + + change_data_type( + sushi_context, + "sushi.order_items", + DataType.Type.INT, + DataType.Type.BIGINT, + ) + + plan_builder = sushi_context.plan_builder("prod") + plan = plan_builder.build() + + sushi_items_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) + sushi_order_items_snapshot = sushi_context.get_snapshot( + "sushi.order_items", raise_if_missing=True + ) + sushi_water_revenue_by_day_snapshot = sushi_context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + + items = plan.context_diff.snapshots[sushi_items_snapshot.snapshot_id] + order_items = plan.context_diff.snapshots[sushi_order_items_snapshot.snapshot_id] + waiter_revenue = plan.context_diff.snapshots[sushi_water_revenue_by_day_snapshot.snapshot_id] + + plan_builder.set_choice(items, SnapshotChangeCategory.BREAKING).set_choice( + order_items, SnapshotChangeCategory.NON_BREAKING + ) + plan_builder.build() + assert items.is_new_version + assert waiter_revenue.is_new_version + plan_builder.set_choice(items, SnapshotChangeCategory.NON_BREAKING) + plan_builder.build() + assert not waiter_revenue.is_new_version + + +@pytest.mark.parametrize( + "change_categories, expected", + [ + ([SnapshotChangeCategory.NON_BREAKING], SnapshotChangeCategory.BREAKING), + ([SnapshotChangeCategory.BREAKING], SnapshotChangeCategory.BREAKING), + ( + [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.NON_BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ( + [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ( + [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.NON_BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ( + [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ], +) +def test_revert( + sushi_context: Context, + change_categories: t.List[SnapshotChangeCategory], + expected: SnapshotChangeCategory, +): + environment = "prod" + original_snapshot_id = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) + + types = (DataType.Type.DOUBLE, DataType.Type.FLOAT, DataType.Type.DECIMAL) + assert len(change_categories) < len(types) + + for i, category in enumerate(change_categories): + change_data_type(sushi_context, "sushi.items", *types[i : i + 2]) + apply_to_environment(sushi_context, environment, category) + assert ( + sushi_context.get_snapshot("sushi.items", raise_if_missing=True) != original_snapshot_id + ) + + change_data_type(sushi_context, "sushi.items", types[len(change_categories)], types[0]) + + def _validate_plan(_, plan): + snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') + assert snapshot.change_category == expected + assert not plan.missing_intervals + + apply_to_environment( + sushi_context, + environment, + change_categories[-1], + plan_validators=[_validate_plan], + ) + assert sushi_context.get_snapshot("sushi.items", raise_if_missing=True) == original_snapshot_id + + +def test_revert_after_downstream_change(sushi_context: Context): + environment = "prod" + change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.BREAKING) + + change_data_type( + sushi_context, + "sushi.waiter_revenue_by_day", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DOUBLE) + + def _validate_plan(_, plan): + snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') + assert snapshot.change_category == SnapshotChangeCategory.BREAKING + assert plan.missing_intervals + + apply_to_environment( + sushi_context, + environment, + SnapshotChangeCategory.BREAKING, + plan_validators=[_validate_plan], + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_change_after_forward_only_in_dev(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + # Make sure that the most downstream model is a materialized model. + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + # Make sushi.orders a forward-only model. + model = context.get_model("sushi.orders") + updated_model_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"stamp": "force new version", "kind": updated_model_kind}) + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert not plan.requires_backfill + context.apply(plan) + + # Make a non-breaking change to a model. + model = context.get_model("sushi.top_waiters") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the non-breaking changes. + context.apply(plan) + + # Make a non-breaking change upstream from the previously modified model. + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the upstream non-breaking changes. + context.apply(plan) + assert not context.plan_builder("dev", skip_tests=True).build().requires_backfill + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@pytest.mark.parametrize("forward_only", [False, True]) +def test_plan_repairs_unrenderable_snapshot_state( + init_and_plan_context: t.Callable, forward_only: bool +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + target_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert target_snapshot + + # Manually corrupt the snapshot's query + raw_snapshot = context.state_sync.state_sync.engine_adapter.fetchone( + f"SELECT snapshot FROM sqlmesh._snapshots WHERE name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'" + )[0] # type: ignore + parsed_snapshot = json.loads(raw_snapshot) + parsed_snapshot["node"]["query"] = "SELECT @missing_macro()" + context.state_sync.state_sync.engine_adapter.update_table( + "sqlmesh._snapshots", + {"snapshot": json.dumps(parsed_snapshot)}, + f"name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'", + ) + + context.clear_caches() + target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ + target_snapshot.snapshot_id + ] + + with pytest.raises(Exception): + target_snapshot_in_state.model.render_query_or_raise() + + # Repair the snapshot by creating a new version of it + context.upsert_model(target_snapshot.model.name, stamp="repair") + target_snapshot = context.get_snapshot(target_snapshot.name) + + plan_builder = context.plan_builder("prod", forward_only=forward_only) + plan = plan_builder.build() + if not forward_only: + assert target_snapshot.snapshot_id in {i.snapshot_id for i in plan.missing_intervals} + assert plan.directly_modified == {target_snapshot.snapshot_id} + plan_builder.set_choice(target_snapshot, SnapshotChangeCategory.NON_BREAKING) + plan = plan_builder.build() + + context.apply(plan) + + context.clear_caches() + assert context.get_snapshot(target_snapshot.name).model.render_query_or_raise() + target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ + target_snapshot.snapshot_id + ] + assert target_snapshot_in_state.model.render_query_or_raise() + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_no_backfill_for_model_downstream_of_metadata_change(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # Make sushi.waiter_revenue_by_day a forward-only model. + forward_only_model = context.get_model("sushi.waiter_revenue_by_day") + updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) + forward_only_model = forward_only_model.copy(update={"kind": updated_model_kind}) + context.upsert_model(forward_only_model) + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Make a metadata change upstream of the forward-only model. + context.upsert_model("sushi.orders", owner="new_owner") + + plan = context.plan_builder("test_dev").build() + assert plan.has_changes + assert not plan.directly_modified + assert not plan.indirectly_modified + assert not plan.missing_intervals + assert all( + snapshot.change_category == SnapshotChangeCategory.METADATA + for snapshot in plan.new_snapshots + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_set_choice_is_reflected_in_missing_intervals(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan_builder = context.plan_builder("dev", skip_tests=True) + plan = plan_builder.build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Change the category to BREAKING + plan = plan_builder.set_choice( + plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.BREAKING + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_BREAKING + ) + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Change the category back to NON_BREAKING + plan = plan_builder.set_choice( + plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.NON_BREAKING + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [ + pd.to_datetime(x) + for x in [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + ] + ] + + # Promote changes to prod + prod_plan = context.plan_builder(skip_tests=True).build() + assert not prod_plan.missing_intervals + + context.apply(prod_plan) + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert prod_df["event_date"].tolist() == [ + pd.to_datetime(x) + for x in [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + ] + ] + + +def test_plan_production_environment_statements(tmp_path: Path): + model_a = """ + MODEL ( + name test_schema.a, + kind FULL, + ); + + @IF( + @runtime_stage IN ('evaluating', 'creating'), + INSERT INTO schema_names_for_prod (physical_schema_name) VALUES (@resolve_template('@{schema_name}')) + ); + + SELECT 1 AS account_id + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + before_all = [ + "CREATE TABLE IF NOT EXISTS schema_names_for_@this_env (physical_schema_name VARCHAR)", + "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS should_create AS SELECT @runtime_stage)", + ] + after_all = [ + "@IF(@this_env = 'prod', CREATE TABLE IF NOT EXISTS after_t AS SELECT @var_5)", + "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS not_create AS SELECT @runtime_stage)", + ] + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=before_all, + after_all=after_all, + variables={"var_5": 5}, + ) + ctx = Context(paths=[tmp_path], config=config) + ctx.plan(auto_apply=True, no_prompts=True) + + before_t = ctx.fetchdf("select * from schema_names_for_prod").to_dict() + assert before_t["physical_schema_name"][0] == "sqlmesh__test_schema" + + after_t = ctx.fetchdf("select * from after_t").to_dict() + assert after_t["5"][0] == 5 + + environment_statements = ctx.state_reader.get_environment_statements(c.PROD) + assert environment_statements[0].before_all == before_all + assert environment_statements[0].after_all == after_all + assert environment_statements[0].python_env.keys() == {"__sqlmesh__vars__"} + assert environment_statements[0].python_env["__sqlmesh__vars__"].payload == "{'var_5': 5}" + + should_create = ctx.fetchdf("select * from should_create").to_dict() + assert should_create["before_all"][0] == "before_all" + + with pytest.raises( + Exception, match=r"Catalog Error: Table with name not_create does not exist!" + ): + ctx.fetchdf("select * from not_create") + + +def test_environment_statements_error_handling(tmp_path: Path): + model_a = """ + MODEL ( + name test_schema.a, + kind FULL, + ); + + SELECT 1 AS account_id + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + before_all = [ + "CREATE TABLE identical_table (physical_schema_name VARCHAR)", + "CREATE TABLE identical_table (physical_schema_name VARCHAR)", + ] + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=before_all, + ) + ctx = Context(paths=[tmp_path], config=config) + + expected_error_message = re.escape( + """An error occurred during execution of the following 'before_all' statement: + +CREATE TABLE identical_table (physical_schema_name TEXT) + +Catalog Error: Table with name "identical_table" already exists!""" + ) + + with pytest.raises(SQLMeshError, match=expected_error_message): + ctx.plan(auto_apply=True, no_prompts=True) + + after_all = [ + "@bad_macro()", + ] + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + after_all=after_all, + ) + ctx = Context(paths=[tmp_path], config=config) + + expected_error_message = re.escape( + """An error occurred during rendering of the 'after_all' statements: + +Failed to resolve macros for + +@bad_macro() + +Macro 'bad_macro' does not exist.""" + ) + + with pytest.raises(SQLMeshError, match=expected_error_message): + ctx.plan(auto_apply=True, no_prompts=True) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_full_model_change_with_plan_start_not_matching_model_start( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.top_waiters") + context.upsert_model(model, kind=model_kind_type_from_name("FULL")()) # type: ignore + + # Apply the change with --skip-backfill first and no plan start + context.plan("dev", skip_tests=True, skip_backfill=True, no_prompts=True, auto_apply=True) + + # Apply the plan again but this time don't skip backfill and set start + # to be later than the model start + context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, start="1 day ago") + + # Check that the number of rows is not 0 + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.top_waiters")[0] + assert row_num > 0 + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_hourly_model_with_lookback_no_backfill_in_dev(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = SqlModel.parse_obj( + { + **model.dict(), + "kind": model.kind.copy(update={"lookback": 1}), + "cron": "@hourly", + "audits": [], + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + top_waiters_model = context.get_model("sushi.top_waiters") + top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) + context.upsert_model(top_waiters_model) + + context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + with time_machine.travel(now() + timedelta(hours=2)): + plan = context.plan_builder("dev", skip_tests=True).build() + # Make sure the waiter_revenue_by_day model is not backfilled. + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_max_interval_end_per_model_not_applied_when_end_is_provided( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + context.run() + + plan = context.plan_builder( + restate_models=["*"], start="2023-01-09", end="2023-01-09" + ).build() + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_against_expired_environment(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + modified_models = {model.fqn, context.get_model("sushi.top_waiters").fqn} + + plan = context.plan_builder("dev").build() + assert plan.has_changes + assert set(plan.context_diff.modified_snapshots) == modified_models + assert plan.missing_intervals + context.apply(plan) + + # Make sure there are no changes when comparing against the existing environment. + plan = context.plan_builder("dev").build() + assert not plan.has_changes + assert not plan.context_diff.modified_snapshots + assert not plan.missing_intervals + + # Invalidate the environment and make sure that the plan detects the changes. + context.invalidate_environment("dev") + plan = context.plan_builder("dev").build() + assert plan.has_changes + assert set(plan.context_diff.modified_snapshots) == modified_models + assert not plan.missing_intervals + context.apply(plan) + + +def test_plan_environment_statements_doesnt_cause_extra_diff(tmp_path: Path): + model_a = """ + MODEL ( + name test_schema.a, + kind FULL, + ); + + SELECT 1; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + (models_dir / "a.sql").write_text(model_a) + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=["select 1 as before_all"], + after_all=["select 2 as after_all"], + ) + ctx = Context(paths=[tmp_path], config=config) + + # first plan - should apply changes + assert ctx.plan(auto_apply=True, no_prompts=True).has_changes + + # second plan - nothing has changed so should report no changes + assert not ctx.plan(auto_apply=True, no_prompts=True).has_changes + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_snapshot_table_exists_for_promoted_snapshot(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) + + # Drop the views and make sure SQLMesh recreates them later + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + context.engine_adapter.drop_view(top_waiters_snapshot.table_name()) + context.engine_adapter.drop_view(top_waiters_snapshot.table_name(False)) + + # Make the environment unfinalized to force recreation of all views in the virtual layer + context.state_sync.state_sync.engine_adapter.execute( + "UPDATE sqlmesh._environments SET finalized_ts = NULL WHERE name = 'dev'" + ) + + context.plan( + "prod", + restate_models=["sushi.top_waiters"], + auto_apply=True, + no_prompts=True, + skip_tests=True, + ) + assert context.engine_adapter.table_exists(top_waiters_snapshot.table_name()) + + +def test_plan_twice_with_star_macro_yields_no_diff(tmp_path: Path): + init_example_project(tmp_path, engine_type="duckdb") + + star_model_definition = """ + MODEL ( + name sqlmesh_example.star_model, + kind FULL + ); + + SELECT @STAR(sqlmesh_example.full_model) FROM sqlmesh_example.full_model + """ + + star_model_path = tmp_path / "models" / "star_model.sql" + star_model_path.write_text(star_model_definition) + + db_path = str(tmp_path / "db.db") + config = Config( + gateways={"main": GatewayConfig(connection=DuckDBConnectionConfig(database=db_path))}, + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + ) + context = Context(paths=tmp_path, config=config) + context.plan(auto_apply=True, no_prompts=True) + + # Instantiate new context to remove caches etc + new_context = Context(paths=tmp_path, config=config) + + star_model = new_context.get_model("sqlmesh_example.star_model") + assert ( + star_model.render_query_or_raise().sql() + == 'SELECT CAST("full_model"."item_id" AS INT) AS "item_id", CAST("full_model"."num_orders" AS BIGINT) AS "num_orders" FROM "db"."sqlmesh_example"."full_model" AS "full_model"' + ) + + new_plan = new_context.plan_builder().build() + assert not new_plan.has_changes + assert not new_plan.new_snapshots + + +class OldPythonModel(PythonModel): + kind: ModelKind = ViewKind() + + +def test_python_model_default_kind_change(init_and_plan_context: t.Callable): + """ + Around 2024-07-17 Python models had their default Kind changed from VIEW to FULL in order to + avoid some edge cases where the views might not get updated in certain situations. + + This test ensures that if a user had a Python `kind: VIEW` model stored in state, + it can still be loaded without error and just show as a breaking change from `kind: VIEW` + to `kind: FULL` + """ + + # note: we deliberately dont specify a Kind here to allow the defaults to be picked up + python_model_file = """import typing as t +import pandas as pd # noqa: TID253 +from sqlmesh import ExecutionContext, model + +@model( + "sushi.python_view_model", + columns={ + "id": "int", + } +) +def execute( + context: ExecutionContext, + **kwargs: t.Any, +) -> pd.DataFrame: + return pd.DataFrame([ + {"id": 1} + ]) +""" + + context: Context + context, _ = init_and_plan_context("examples/sushi") + + with open(context.path / "models" / "python_view_model.py", mode="w", encoding="utf8") as f: + f.write(python_model_file) + + # monkey-patch PythonModel to default to kind: View again + # and ViewKind to allow python models again + with ( + mock.patch.object(ViewKind, "supports_python_models", return_value=True), + mock.patch("sqlmesh.core.model.definition.PythonModel", OldPythonModel), + ): + context.load() + + # check the monkey-patching worked + model = context.get_model("sushi.python_view_model") + assert model.kind.name == ModelKindName.VIEW + assert model.source_type == "python" + + # apply plan + plan: Plan = context.plan(auto_apply=True) + + # check that run() still works even though we have a Python model with kind: View in the state + snapshot_ids = [s for s in plan.directly_modified if "python_view_model" in s.name] + snapshot_from_state = list(context.state_sync.get_snapshots(snapshot_ids).values())[0] + assert snapshot_from_state.model.kind.name == ModelKindName.VIEW + assert snapshot_from_state.model.source_type == "python" + context.run() + + # reload context to load model with new defaults + # this also shows the earlier monkey-patching is no longer in effect + context.load() + model = context.get_model("sushi.python_view_model") + assert model.kind.name == ModelKindName.FULL + assert model.source_type == "python" + + plan = context.plan( + categorizer_config=CategorizerConfig.all_full() + ) # the default categorizer_config doesnt auto-categorize python models + + assert plan.has_changes + assert not plan.indirectly_modified + + assert len(plan.directly_modified) == 1 + snapshot_id = list(plan.directly_modified)[0] + assert snapshot_id.name == '"memory"."sushi"."python_view_model"' + assert plan.modified_snapshots[snapshot_id].change_category == SnapshotChangeCategory.BREAKING + + context.apply(plan) + + df = context.engine_adapter.fetchdf("SELECT id FROM sushi.python_view_model") + assert df["id"].to_list() == [1] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@pytest.mark.parametrize( + "parent_a_category,parent_b_category,expected_child_category", + [ + ( + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.INDIRECT_BREAKING, + ), + ( + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.INDIRECT_NON_BREAKING, + ), + ( + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.INDIRECT_NON_BREAKING, + ), + ( + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.INDIRECT_BREAKING, + ), + ( + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + ), + ( + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + ), + ( + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.INDIRECT_BREAKING, + ), + ( + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.INDIRECT_NON_BREAKING, + ), + ( + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + ), + ], +) +def test_rebase_two_changed_parents( + init_and_plan_context: t.Callable, + parent_a_category: SnapshotChangeCategory, # This change is deployed to prod first + parent_b_category: SnapshotChangeCategory, # This change is deployed to prod second + expected_child_category: SnapshotChangeCategory, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + initial_model_a = context.get_model("sushi.orders") + initial_model_b = context.get_model("sushi.items") + + # Make change A and deploy it to dev_a + context.upsert_model(initial_model_a.name, stamp="1") + plan_builder = context.plan_builder("dev_a", skip_tests=True) + plan_builder.set_choice(context.get_snapshot(initial_model_a.name), parent_a_category) + context.apply(plan_builder.build()) + + # Make change B and deploy it to dev_b + context.upsert_model(initial_model_a) + context.upsert_model(initial_model_b.name, stamp="1") + plan_builder = context.plan_builder("dev_b", skip_tests=True) + plan_builder.set_choice(context.get_snapshot(initial_model_b.name), parent_b_category) + context.apply(plan_builder.build()) + + # Deploy change A to prod + context.upsert_model(initial_model_a.name, stamp="1") + context.upsert_model(initial_model_b) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Apply change B in addition to A and plan against prod + context.upsert_model(initial_model_b.name, stamp="1") + plan = context.plan_builder("prod", skip_tests=True).build() + + # Validate the category of child snapshots + direct_child_snapshot = plan.snapshots[context.get_snapshot("sushi.order_items").snapshot_id] + assert direct_child_snapshot.change_category == expected_child_category + + indirect_child_snapshot = plan.snapshots[context.get_snapshot("sushi.top_waiters").snapshot_id] + assert indirect_child_snapshot.change_category == expected_child_category + + +@pytest.mark.parametrize( + "context_fixture", + ["sushi_context", "sushi_no_default_catalog"], +) +def test_unaligned_start_snapshots(context_fixture: Context, request): + context = request.getfixturevalue(context_fixture) + environment = "dev" + apply_to_environment(context, environment) + # Make breaking change to model upstream of a depends_on_self model + context.upsert_model("sushi.order_items", stamp="1") + # Apply the change starting at a date later then the beginning of the downstream depends_on_self model + plan = apply_to_environment( + context, + environment, + choice=SnapshotChangeCategory.BREAKING, + plan_start="2 days ago", + enable_preview=True, + ) + revenue_lifetime_snapshot = context.get_snapshot( + "sushi.customer_revenue_lifetime", raise_if_missing=True + ) + # Validate that the depends_on_self model is non-deployable + assert not plan.deployability_index.is_deployable(revenue_lifetime_snapshot) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_unaligned_start_snapshot_with_non_deployable_downstream(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + downstream_model_name = "memory.sushi.customer_max_revenue" + + expressions = d.parse( + f""" + MODEL ( + name {downstream_model_name}, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key customer_id, + forward_only true, + ), + ); + + SELECT + customer_id, MAX(revenue) AS max_revenue + FROM memory.sushi.customer_revenue_lifetime + GROUP BY 1; + """ + ) + + downstream_model = load_sql_based_model(expressions) + assert downstream_model.forward_only + context.upsert_model(downstream_model) + + context.plan(auto_apply=True, no_prompts=True) + + customer_revenue_lifetime_model = context.get_model("sushi.customer_revenue_lifetime") + kwargs = { + **customer_revenue_lifetime_model.dict(), + "name": "memory.sushi.customer_revenue_lifetime_new", + "kind": dict( + name="INCREMENTAL_UNMANAGED" + ), # Make it incremental unmanaged to ensure the depends_on_past behavior. + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + context.upsert_model( + downstream_model_name, + query_=ParsableSql( + sql="SELECT customer_id, MAX(revenue) AS max_revenue FROM memory.sushi.customer_revenue_lifetime_new GROUP BY 1" + ), + ) + + plan = context.plan_builder("dev", enable_preview=True).build() + assert {s.name for s in plan.new_snapshots} == { + '"memory"."sushi"."customer_revenue_lifetime_new"', + '"memory"."sushi"."customer_max_revenue"', + } + for snapshot_interval in plan.missing_intervals: + assert not plan.deployability_index.is_deployable(snapshot_interval.snapshot_id) + assert snapshot_interval.intervals[0][0] == to_timestamp("2023-01-07") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_view_is_updated_with_new_table_references( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Add a new projection to the base model + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Run the janitor to delete the old snapshot record + context.run_janitor(ignore_ttl=True) + + # Check the downstream view and make sure it's still queryable + assert context.get_model("sushi.top_waiters").kind.is_view + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi.top_waiters")[0] + assert row_num > 0 + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_annotated_self_referential_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # Projections are fully annotated in the query but columns were not specified explicitly + expressions = d.parse( + f""" + MODEL ( + name memory.sushi.test_self_ref, + kind FULL, + start '2023-01-01', + ); + + SELECT 1::INT AS one FROM memory.sushi.test_self_ref; + """ + ) + model = load_sql_based_model(expressions) + assert model.depends_on_self + context.upsert_model(model) + + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) + + df = context.fetchdf("SELECT one FROM memory.sushi.test_self_ref") + assert len(df) == 0 diff --git a/tests/core/integration/test_config.py b/tests/core/integration/test_config.py new file mode 100644 index 0000000000..5d571cd7c5 --- /dev/null +++ b/tests/core/integration/test_config.py @@ -0,0 +1,580 @@ +from __future__ import annotations + +import typing as t +from unittest.mock import patch +import logging +import pytest +from pytest import MonkeyPatch +from pathlib import Path +from pytest_mock.plugin import MockerFixture +from sqlglot import exp +from IPython.utils.capture import capture_output + +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, + TableNamingConvention, + AutoCategorizationMode, +) +from sqlmesh.core.config.common import EnvironmentSuffixTarget +from sqlmesh.core.context import Context +from sqlmesh.core.config.plan import PlanConfig +from sqlmesh.core.engine_adapter import DuckDBEngineAdapter +from sqlmesh.core.model import SqlModel +from sqlmesh.core.model.common import ParsableSql +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.errors import ( + ConfigError, +) +from tests.conftest import DuckDBMetadata +from tests.utils.test_helpers import use_terminal_console +from tests.utils.test_filesystem import create_temp_file +from tests.core.integration.utils import apply_to_environment, initial_add + +pytestmark = pytest.mark.slow + + +@pytest.mark.set_default_connection(disable=True) +def test_missing_connection_config(): + # This is testing the actual implementation of Config.get_connection + # To make writing tests easier, it's patched by the autouse fixture provide_sqlmesh_default_connection + # Case 1: No default_connection or gateways specified should raise a ConfigError + with pytest.raises(ConfigError): + ctx = Context(config=Config()) + + # Case 2: No connection specified in the gateway should raise a ConfigError + with pytest.raises(ConfigError): + ctx = Context(config=Config(gateways={"incorrect": GatewayConfig()})) + + # Case 3: Specifying a default_connection or connection in the gateway should work + ctx = Context(config=Config(default_connection=DuckDBConnectionConfig())) + ctx = Context( + config=Config(gateways={"default": GatewayConfig(connection=DuckDBConnectionConfig())}) + ) + + +def test_physical_table_naming_strategy_table_only(copy_to_temp_path: t.Callable): + sushi_context = Context( + paths=copy_to_temp_path("examples/sushi"), + config="table_only_naming_config", + ) + + assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.TABLE_ONLY + sushi_context.plan(auto_apply=True) + + adapter = sushi_context.engine_adapter + + snapshot_tables = [ + dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) + for r in adapter.fetchall( + "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" + ) + ] + + assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) + + prod_env = sushi_context.state_reader.get_environment("prod") + assert prod_env + + prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) + + assert all( + s.table_naming_convention == TableNamingConvention.TABLE_ONLY + for s in prod_env_snapshots.values() + ) + + +def test_physical_table_naming_strategy_hash_md5(copy_to_temp_path: t.Callable): + sushi_context = Context( + paths=copy_to_temp_path("examples/sushi"), + config="hash_md5_naming_config", + ) + + assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.HASH_MD5 + sushi_context.plan(auto_apply=True) + + adapter = sushi_context.engine_adapter + + snapshot_tables = [ + dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) + for r in adapter.fetchall( + "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" + ) + ] + + assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) + assert all([t["table"].startswith("sqlmesh_md5") for t in snapshot_tables]) + + prod_env = sushi_context.state_reader.get_environment("prod") + assert prod_env + + prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) + + assert all( + s.table_naming_convention == TableNamingConvention.HASH_MD5 + for s in prod_env_snapshots.values() + ) + + +def test_environment_suffix_target_table(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context( + "examples/sushi", config="environment_suffix_table_config" + ) + context.apply(plan) + metadata = DuckDBMetadata.from_context(context) + environments_schemas = {"sushi"} + internal_schemas = {"sqlmesh", "sqlmesh__sushi"} + starting_schemas = environments_schemas | internal_schemas + # Make sure no new schemas are created + assert set(metadata.schemas) - starting_schemas == {"raw"} + prod_views = {x for x in metadata.qualified_views if x.db in environments_schemas} + # Make sure that all models are present + assert len(prod_views) == 16 + apply_to_environment(context, "dev") + # Make sure no new schemas are created + assert set(metadata.schemas) - starting_schemas == {"raw"} + dev_views = { + x for x in metadata.qualified_views if x.db in environments_schemas and "__dev" in x.name + } + # Make sure that there is a view with `__dev` for each view that exists in prod + assert len(dev_views) == len(prod_views) + assert {x.name.replace("__dev", "") for x in dev_views} - {x.name for x in prod_views} == set() + context.invalidate_environment("dev") + context._run_janitor() + views_after_janitor = metadata.qualified_views + # Make sure that the number of views after the janitor is the same as when you subtract away dev views + assert len(views_after_janitor) == len( + {x.sql(dialect="duckdb") for x in views_after_janitor} + - {x.sql(dialect="duckdb") for x in dev_views} + ) + # Double check there are no dev views + assert len({x for x in views_after_janitor if "__dev" in x.name}) == 0 + # Make sure prod views were not removed + assert {x.sql(dialect="duckdb") for x in prod_views} - { + x.sql(dialect="duckdb") for x in views_after_janitor + } == set() + + +def test_environment_suffix_target_catalog(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: + monkeypatch.chdir(tmp_path) + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(catalogs={"main_warehouse": ":memory:"}), + environment_suffix_target=EnvironmentSuffixTarget.CATALOG, + ) + + assert config.default_connection + + models_dir = tmp_path / "models" + models_dir.mkdir() + + (models_dir / "model.sql").write_text(""" + MODEL ( + name example_schema.test_model, + kind FULL + ); + + SELECT '1' as a""") + + (models_dir / "fqn_model.sql").write_text(""" + MODEL ( + name memory.example_fqn_schema.test_model_fqn, + kind FULL + ); + + SELECT '1' as a""") + + ctx = Context(config=config, paths=tmp_path) + + metadata = DuckDBMetadata.from_context(ctx) + assert ctx.default_catalog == "main_warehouse" + assert metadata.catalogs == {"main_warehouse", "memory"} + + ctx.plan(auto_apply=True) + + # prod should go to the default catalog and not be overridden to a catalog called 'prod' + assert ( + ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore + == "1" + ) + assert ( + ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore + == "1" + ) + assert metadata.catalogs == {"main_warehouse", "memory"} + assert metadata.schemas_in_catalog("main_warehouse") == [ + "example_schema", + "sqlmesh__example_schema", + ] + assert metadata.schemas_in_catalog("memory") == [ + "example_fqn_schema", + "sqlmesh__example_fqn_schema", + ] + + # dev should be overridden to go to a catalogs called 'main_warehouse__dev' and 'memory__dev' + ctx.plan(environment="dev", include_unmodified=True, auto_apply=True) + assert ( + ctx.engine_adapter.fetchone("select * from main_warehouse__dev.example_schema.test_model")[ + 0 + ] # type: ignore + == "1" + ) + assert ( + ctx.engine_adapter.fetchone("select * from memory__dev.example_fqn_schema.test_model_fqn")[ + 0 + ] # type: ignore + == "1" + ) + assert metadata.catalogs == {"main_warehouse", "main_warehouse__dev", "memory", "memory__dev"} + + # schemas in dev envs should match prod and not have a suffix + assert metadata.schemas_in_catalog("main_warehouse") == [ + "example_schema", + "sqlmesh__example_schema", + ] + assert metadata.schemas_in_catalog("main_warehouse__dev") == ["example_schema"] + assert metadata.schemas_in_catalog("memory") == [ + "example_fqn_schema", + "sqlmesh__example_fqn_schema", + ] + assert metadata.schemas_in_catalog("memory__dev") == ["example_fqn_schema"] + + ctx.invalidate_environment("dev", sync=True) + + # dev catalogs cleaned up + assert metadata.catalogs == {"main_warehouse", "memory"} + + # prod catalogs still contain physical layer and views still work + assert metadata.schemas_in_catalog("main_warehouse") == [ + "example_schema", + "sqlmesh__example_schema", + ] + assert metadata.schemas_in_catalog("memory") == [ + "example_fqn_schema", + "sqlmesh__example_fqn_schema", + ] + + assert ( + ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore + == "1" + ) + assert ( + ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore + == "1" + ) + + +def test_environment_catalog_mapping(init_and_plan_context: t.Callable): + environments_schemas = {"raw", "sushi"} + + def get_prod_dev_views(metadata: DuckDBMetadata) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: + views = metadata.qualified_views + prod_views = { + x for x in views if x.catalog == "prod_catalog" if x.db in environments_schemas + } + dev_views = {x for x in views if x.catalog == "dev_catalog" if x.db in environments_schemas} + return prod_views, dev_views + + def get_default_catalog_and_non_tables( + metadata: DuckDBMetadata, default_catalog: t.Optional[str] + ) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: + tables = metadata.qualified_tables + user_default_tables = { + x for x in tables if x.catalog == default_catalog and x.db != "sqlmesh" + } + non_default_tables = {x for x in tables if x.catalog != default_catalog} + return user_default_tables, non_default_tables + + context, plan = init_and_plan_context( + "examples/sushi", config="environment_catalog_mapping_config" + ) + context.apply(plan) + metadata = DuckDBMetadata(context.engine_adapter) + state_metadata = DuckDBMetadata.from_context(context.state_sync.state_sync) + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 0 + assert len(user_default_tables) == 15 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + apply_to_environment(context, "dev") + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 16 + assert len(user_default_tables) == 16 + assert len(non_default_tables) == 0 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + apply_to_environment(context, "prodnot") + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 32 + assert len(user_default_tables) == 16 + assert len(non_default_tables) == 0 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + context.invalidate_environment("dev") + context._run_janitor() + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 16 + assert len(user_default_tables) == 16 + assert len(non_default_tables) == 0 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + + +@use_terminal_console +def test_plan_always_recreate_environment(tmp_path: Path): + def plan_with_output(ctx: Context, environment: str): + with patch.object(logger, "info") as mock_logger: + with capture_output() as output: + ctx.load() + ctx.plan(environment, no_prompts=True, auto_apply=True) + + # Facade logs info "Promoting environment {environment}" + assert mock_logger.call_args[0][1] == environment + + return output + + models_dir = tmp_path / "models" + + logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") + + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" + ) + + config = Config(plan=PlanConfig(always_recreate_environment=True)) + ctx = Context(paths=[tmp_path], config=config) + + # Case 1: Neither prod nor dev exists, so dev is initialized + output = plan_with_output(ctx, "dev") + + assert """`dev` environment will be initialized""" in output.stdout + + # Case 2: Prod does not exist, so dev is updated + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 5 AS col" + ) + + output = plan_with_output(ctx, "dev") + assert "`dev` environment will be initialized" in output.stdout + + # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod + output = plan_with_output(ctx, "prod") + assert "`prod` environment will be initialized" in output.stdout + + # Case 4: Dev is updated with a breaking change. Prod exists now so plan comparisons moving forward should be against prod + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout + assert "Differences from the `prod` environment" in output.stdout + + # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes + # so it's still classified as a breaking change + create_temp_file( + tmp_path, + models_dir / "a.sql", + "MODEL (name test.a, kind FULL, owner 'test'); SELECT 10 AS col", + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout + assert "Differences from the `prod` environment" in output.stdout + + stdout_rstrip = "\n".join([line.rstrip() for line in output.stdout.split("\n")]) + assert ( + """MODEL ( + name test.a, ++ owner test, + kind FULL + ) + SELECT +- 5 AS col ++ 10 AS col""" + in stdout_rstrip + ) + + # Case 6: Ensure that target environment and create_from environment are not the same + output = plan_with_output(ctx, "prod") + assert not "New environment `prod` will be created from `prod`" in output.stdout + + # Case 7: Check that we can still run Context::diff() against any environment + for environment in ["dev", "prod"]: + context_diff = ctx._context_diff(environment) + assert context_diff.environment == environment + + +def test_before_all_after_all_execution_order(tmp_path: Path, mocker: MockerFixture): + model = """ + MODEL ( + name test_schema.model_that_depends_on_before_all, + kind FULL, + ); + + SELECT id, value FROM before_all_created_table + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "model.sql", "w") as f: + f.write(model) + + # before_all statement that creates a table that the above model depends on + before_all_statement = ( + "CREATE TABLE IF NOT EXISTS before_all_created_table AS SELECT 1 AS id, 'test' AS value" + ) + + # after_all that depends on the model + after_all_statement = "CREATE TABLE IF NOT EXISTS after_all_created_table AS SELECT id, value FROM test_schema.model_that_depends_on_before_all" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=[before_all_statement], + after_all=[after_all_statement], + ) + + execute_calls: t.List[str] = [] + + original_duckdb_execute = DuckDBEngineAdapter.execute + + def track_duckdb_execute(self, expression, **kwargs): + sql = expression if isinstance(expression, str) else expression.sql(dialect="duckdb") + state_tables = [ + "_snapshots", + "_environments", + "_versions", + "_intervals", + "_auto_restatements", + "_environment_statements", + ] + + # to ignore the state queries + if not any(table in sql.lower() for table in state_tables): + execute_calls.append(sql) + + return original_duckdb_execute(self, expression, **kwargs) + + ctx = Context(paths=[tmp_path], config=config) + + # the plan would fail if the execution order ever changes and before_all statements dont execute first + ctx.plan(auto_apply=True, no_prompts=True) + + mocker.patch.object(DuckDBEngineAdapter, "execute", track_duckdb_execute) + + # run with the patched execute + ctx.run("prod", start="2023-01-01", end="2023-01-02") + + # validate explicitly that the first execute is for the before_all + assert "before_all_created_table" in execute_calls[0] + + # and that the last is the sole after all that depends on the model + assert "after_all_created_table" in execute_calls[-1] + + +def test_auto_categorization(sushi_context: Context): + environment = "dev" + for config in sushi_context.configs.values(): + config.plan.auto_categorize_changes.sql = AutoCategorizationMode.FULL + initial_add(sushi_context, environment) + + version = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).version + fingerprint = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + + model = t.cast(SqlModel, sushi_context.get_model("sushi.customers", raise_if_missing=True)) + sushi_context.upsert_model( + "sushi.customers", + query_=ParsableSql(sql=model.query.select("'foo' AS foo").sql(dialect=model.dialect)), # type: ignore + ) + apply_to_environment(sushi_context, environment) + + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + != fingerprint + ) + assert ( + sushi_context.get_snapshot("sushi.waiter_as_customer_by_day", raise_if_missing=True).version + == version + ) diff --git a/tests/core/integration/test_cron.py b/tests/core/integration/test_cron.py new file mode 100644 index 0000000000..fa327ac36f --- /dev/null +++ b/tests/core/integration/test_cron.py @@ -0,0 +1,247 @@ +from __future__ import annotations + +import typing as t +import pytest +import time_machine + +from sqlmesh.core import dialect as d +from sqlmesh.core.model import ( + SqlModel, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.utils.date import to_timestamp +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +@pytest.mark.parametrize( + "forward_only, expected_intervals", + [ + ( + False, + [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + ], + ), + ( + True, + [ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + ], + ), + ], +) +def test_cron_not_aligned_with_day_boundary( + init_and_plan_context: t.Callable, + forward_only: bool, + expected_intervals: t.List[t.Tuple[int, int]], +): + context, plan = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj( + { + **model.dict(), + "kind": model.kind.copy(update={"forward_only": forward_only}), + "cron": "0 12 * * *", + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) + assert waiter_revenue_by_day_snapshot.intervals == [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) + ] + + model = add_projection_to_model(t.cast(SqlModel, model), literal=True) + context.upsert_model(model) + + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + + with time_machine.travel("2023-01-08 00:10:00 UTC"): # Past model's cron. + plan = context.plan_builder( + "dev", select_models=[model.name], skip_tests=True, enable_preview=True + ).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=expected_intervals, + ), + ] + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_cron_not_aligned_with_day_boundary_new_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + existing_model = context.get_model("sushi.waiter_revenue_by_day") + existing_model = SqlModel.parse_obj( + { + **existing_model.dict(), + "kind": existing_model.kind.copy(update={"forward_only": True}), + } + ) + context.upsert_model(existing_model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + # Add a new model and make a change to a forward-only model. + # The cron of the new model is not aligned with the day boundary. + new_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind FULL, + cron '0 8 * * *', + start '2023-01-01', + ); + + SELECT 1 AS one; + """ + ) + ) + context.upsert_model(new_model) + + existing_model = add_projection_to_model(t.cast(SqlModel, existing_model), literal=True) + context.upsert_model(existing_model) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "memory.sushi.new_model", raise_if_missing=True + ).snapshot_id, + intervals=[(to_timestamp("2023-01-06"), to_timestamp("2023-01-07"))], + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2023-01-08 00:00:00 UTC", tick=False) +def test_parent_cron_after_child(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj( + { + **model.dict(), + "cron": "50 23 * * *", + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) + assert waiter_revenue_by_day_snapshot.intervals == [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) + ] + + top_waiters_model = context.get_model("sushi.top_waiters") + top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) + context.upsert_model(top_waiters_model) + + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + with time_machine.travel("2023-01-08 23:55:00 UTC"): # Past parent's cron, but before child's + plan = context.plan_builder("dev", skip_tests=True).build() + # Make sure the waiter_revenue_by_day model is not backfilled. + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2025-03-08 00:00:00 UTC") +def test_tz(init_and_plan_context): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model( + SqlModel.parse_obj( + {**model.dict(), "cron_tz": "America/Los_Angeles", "start": "2025-03-07"} + ) + ) + + def assert_intervals(plan, intervals): + assert ( + next( + intervals.intervals + for intervals in plan.missing_intervals + if intervals.snapshot_id.name == model.fqn + ) + == intervals + ) + + plan = context.plan_builder("prod", skip_tests=True).build() + + # we have missing intervals but not waiter_revenue_by_day because it's not midnight pacific yet + assert plan.missing_intervals + + with pytest.raises(StopIteration): + assert_intervals(plan, []) + + # now we're ready 8AM UTC == midnight PST + with time_machine.travel("2025-03-08 08:00:00 UTC"): + plan = context.plan_builder("prod", skip_tests=True).build() + assert_intervals(plan, [(to_timestamp("2025-03-07"), to_timestamp("2025-03-08"))]) + + with time_machine.travel("2025-03-09 07:00:00 UTC"): + plan = context.plan_builder("prod", skip_tests=True).build() + + assert_intervals( + plan, + [ + (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), + ], + ) + + with time_machine.travel("2025-03-09 08:00:00 UTC"): + plan = context.plan_builder("prod", skip_tests=True).build() + + assert_intervals( + plan, + [ + (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), + (to_timestamp("2025-03-08"), to_timestamp("2025-03-09")), + ], + ) + + context.apply(plan) + + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.missing_intervals diff --git a/tests/core/integration/test_dbt.py b/tests/core/integration/test_dbt.py new file mode 100644 index 0000000000..5e600899dd --- /dev/null +++ b/tests/core/integration/test_dbt.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import typing as t +import pytest +from sqlmesh.core.model.common import ParsableSql +import time_machine + +from sqlmesh.core.context import Context +from sqlmesh.core.model import ( + IncrementalUnmanagedKind, +) +from sqlmesh.core.snapshot import ( + DeployabilityIndex, + SnapshotChangeCategory, +) + +if t.TYPE_CHECKING: + pass + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_select_star_is_directly_modified(sushi_test_dbt_context: Context): + context = sushi_test_dbt_context + + model = context.get_model("sushi.simple_model_a") + context.upsert_model( + model, + query_=ParsableSql(sql="SELECT 1 AS a, 2 AS b"), + ) + + snapshot_a_id = context.get_snapshot("sushi.simple_model_a").snapshot_id # type: ignore + snapshot_b_id = context.get_snapshot("sushi.simple_model_b").snapshot_id # type: ignore + + plan = context.plan_builder("dev", skip_tests=True).build() + assert plan.directly_modified == {snapshot_a_id, snapshot_b_id} + assert {i.snapshot_id for i in plan.missing_intervals} == {snapshot_a_id, snapshot_b_id} + + assert plan.snapshots[snapshot_a_id].change_category == SnapshotChangeCategory.NON_BREAKING + assert plan.snapshots[snapshot_b_id].change_category == SnapshotChangeCategory.NON_BREAKING + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_is_incremental_table_is_missing(sushi_test_dbt_context: Context): + context = sushi_test_dbt_context + + model = context.get_model("sushi.waiter_revenue_by_day_v2") + model = model.copy(update={"kind": IncrementalUnmanagedKind(), "start": "2023-01-01"}) + context.upsert_model(model) + context._standalone_audits["test_top_waiters"].start = "2023-01-01" + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + snapshot = context.get_snapshot("sushi.waiter_revenue_by_day_v2") + assert snapshot + + # Manually drop the table + context.engine_adapter.drop_table(snapshot.table_name()) + + context.snapshot_evaluator.evaluate( + snapshot, + start="2023-01-01", + end="2023-01-08", + execution_time="2023-01-08 15:00:00", + snapshots={s.name: s for s in context.snapshots.values()}, + deployability_index=DeployabilityIndex.all_deployable(), + ) + + # Make sure the table was recreated + assert context.engine_adapter.table_exists(snapshot.table_name()) + + +def test_model_attr(sushi_test_dbt_context: Context, assert_exp_eq): + context = sushi_test_dbt_context + model = context.get_model("sushi.top_waiters") + assert_exp_eq( + model.render_query(), + """ + SELECT + CAST("waiter_id" AS INT) AS "waiter_id", + CAST("revenue" AS DOUBLE) AS "revenue", + 3 AS "model_columns" + FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" + WHERE + "ds" = ( + SELECT + MAX("ds") + FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" + ) + ORDER BY + "revenue" DESC NULLS FIRST + LIMIT 10 + """, + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_requirements(sushi_dbt_context: Context): + assert set(sushi_dbt_context.requirements) == {"dbt-core", "dbt-duckdb"} + assert sushi_dbt_context.requirements["dbt-core"].startswith("1.") + assert sushi_dbt_context.requirements["dbt-duckdb"].startswith("1.") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_dialect_with_normalization_strategy(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context( + "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" + ) + assert context.default_dialect == "duckdb,normalization_strategy=LOWERCASE" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_before_all_with_var_ref_source(init_and_plan_context: t.Callable): + _, plan = init_and_plan_context( + "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" + ) + environment_statements = plan.to_evaluatable().environment_statements + assert environment_statements + rendered_statements = [e.render_before_all(dialect="duckdb") for e in environment_statements] + assert rendered_statements[0] == [ + "CREATE TABLE IF NOT EXISTS analytic_stats (physical_table TEXT, evaluation_time TEXT)", + "CREATE TABLE IF NOT EXISTS to_be_executed_last (col TEXT)", + "SELECT 1 AS var, 'items' AS src, 'waiters' AS ref", + ] diff --git a/tests/core/integration/test_dev_only_vde.py b/tests/core/integration/test_dev_only_vde.py new file mode 100644 index 0000000000..611e207771 --- /dev/null +++ b/tests/core/integration/test_dev_only_vde.py @@ -0,0 +1,477 @@ +from __future__ import annotations + +import typing as t +import pytest +from sqlmesh.core.model.common import ParsableSql +import time_machine + +from sqlmesh.core import dialect as d +from sqlmesh.core.config.common import VirtualEnvironmentMode +from sqlmesh.core.model import ( + FullKind, + IncrementalUnmanagedKind, + SqlModel, + ViewKind, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import to_date, to_timestamp +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + + assert all( + s.virtual_environment_mode.is_dev_only or not s.is_model or s.is_symbolic + for s in context.snapshots.values() + ) + + # Init prod + context.plan("prod", auto_apply=True, no_prompts=True) + + # Make a change in dev + original_model = context.get_model("sushi.waiter_revenue_by_day") + original_fingerprint = context.get_snapshot(original_model.name).fingerprint + model = original_model.copy( + update={ + "query_": ParsableSql( + sql=original_model.query.order_by("waiter_id").sql(dialect=original_model.dialect) + ) + } + ) + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + + plan_dev = context.plan_builder("dev").build() + assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") + assert plan_dev.requires_backfill + assert plan_dev.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + ] + assert plan_dev.context_diff.snapshots[context.get_snapshot(model.name).snapshot_id].intervals + assert plan_dev.context_diff.snapshots[ + context.get_snapshot("sushi.top_waiters").snapshot_id + ].intervals + assert plan_dev.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].dev_intervals + assert plan_dev.context_diff.snapshots[ + context.get_snapshot("sushi.top_waiters").snapshot_id + ].dev_intervals + context.apply(plan_dev) + + # Make sure the waiter_revenue_by_day model is a table in prod and a view in dev + table_types_df = context.engine_adapter.fetchdf( + "SELECT table_schema, table_type FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 'waiter_revenue_by_day'" + ) + assert table_types_df.to_dict("records") == [ + {"table_schema": "sushi", "table_type": "BASE TABLE"}, + {"table_schema": "sushi__dev", "table_type": "VIEW"}, + ] + + # Check that the specified dates were backfilled + min_event_date = context.engine_adapter.fetchone( + "SELECT MIN(event_date) FROM sushi__dev.waiter_revenue_by_day" + )[0] + assert min_event_date == to_date("2023-01-07") + + # Make sure the changes are applied without backfill in prod + plan_prod = context.plan_builder("prod").build() + assert not plan_prod.requires_backfill + assert not plan_prod.missing_intervals + context.apply(plan_prod) + assert "one" in context.engine_adapter.columns("sushi.waiter_revenue_by_day") + + # Make sure the revert of a breaking changes results in a full rebuild + context.upsert_model(original_model) + assert context.get_snapshot(original_model.name).fingerprint == original_fingerprint + + plan_prod = context.plan_builder( + "prod", allow_destructive_models=["sushi.waiter_revenue_by_day"] + ).build() + assert not plan_prod.requires_backfill + assert not plan_prod.missing_intervals + context.apply(plan_prod) + assert "one" not in context.engine_adapter.columns("sushi.waiter_revenue_by_day") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + # Change to full kind + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.missing_intervals + assert prod_plan.requires_backfill + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + # Change back to view + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": ViewKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "view" + + # Change to incremental + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": IncrementalUnmanagedKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + # Change back to full + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change_incremental( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + + forward_only_model_name = "memory.sushi.test_forward_only_model" + forward_only_model_expressions = d.parse( + f""" + MODEL ( + name {forward_only_model_name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + ), + ); + + SELECT '2023-01-01' AS ds, 'value' AS value; + """ + ) + forward_only_model = load_sql_based_model(forward_only_model_expressions) + forward_only_model = forward_only_model.copy( + update={"virtual_environment_mode": VirtualEnvironmentMode.DEV_ONLY} + ) + context.upsert_model(forward_only_model) + + context.plan("prod", auto_apply=True, no_prompts=True) + + # Change to view + model = context.get_model(forward_only_model_name) + original_kind = model.kind + model = model.copy(update={"kind": ViewKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "view" + + model = model.copy(update={"kind": original_kind}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change_with_follow_up_changes_in_dev( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + # Make sure the initial state is a view + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "view" + + # Change to incremental unmanaged kind + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": IncrementalUnmanagedKind()}) + context.upsert_model(model) + dev_plan = context.plan_builder("dev", skip_tests=True).build() + assert dev_plan.missing_intervals + assert dev_plan.requires_backfill + context.apply(dev_plan) + + # Make a follow-up forward-only change + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + dev_plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() + context.apply(dev_plan) + + # Deploy to prod + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change_manual_categorization( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + dev_plan_builder = context.plan_builder("dev", skip_tests=True, no_auto_categorization=True) + dev_plan_builder.set_choice( + dev_plan_builder._context_diff.snapshots[context.get_snapshot(model.name).snapshot_id], + SnapshotChangeCategory.NON_BREAKING, + ) + dev_plan = dev_plan_builder.build() + assert dev_plan.requires_backfill + assert len(dev_plan.missing_intervals) == 1 + context.apply(dev_plan) + + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_seed_model_change( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.load() + context.plan("prod", auto_apply=True, no_prompts=True) + + seed_model = context.get_model("sushi.waiter_names") + with open(seed_model.seed_path, "a") as fd: + fd.write("\n123,New Test Name") + + context.load() + seed_model_snapshot = context.get_snapshot("sushi.waiter_names") + plan = context.plan_builder("dev").build() + assert plan.directly_modified == {seed_model_snapshot.snapshot_id} + assert len(plan.missing_intervals) == 2 + context.apply(plan) + + actual_seed_df_in_dev = context.fetchdf("SELECT * FROM sushi__dev.waiter_names WHERE id = 123") + assert actual_seed_df_in_dev.to_dict("records") == [{"id": 123, "name": "New Test Name"}] + actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") + assert actual_seed_df_in_prod.empty + + plan = context.plan_builder("prod").build() + assert plan.directly_modified == {seed_model_snapshot.snapshot_id} + assert len(plan.missing_intervals) == 1 + assert plan.missing_intervals[0].snapshot_id == seed_model_snapshot.snapshot_id + context.apply(plan) + + actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") + assert actual_seed_df_in_prod.to_dict("records") == [{"id": 123, "name": "New Test Name"}] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_change_downstream_of_seed( + init_and_plan_context: t.Callable, +): + """This test covers a scenario when a model downstream of a seed model is modified and explicitly selected + causing an (unhydrated) seed model to sourced from the state. If SQLMesh attempts to create + a table for the unchanged seed model, it will fail because the seed model is not hydrated. + """ + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.load() + context.plan("prod", auto_apply=True, no_prompts=True) + + # Make sure that a different version of the seed model is loaded + seed_model = context.get_model("sushi.waiter_names") + seed_model = seed_model.copy(update={"stamp": "force new version"}) + context.upsert_model(seed_model) + + # Make a change to the downstream model + model = context.get_model("sushi.waiter_as_customer_by_day") + model = model.copy(update={"stamp": "force new version"}) + context.upsert_model(model) + + # It is important to clear the cache so that the hydrated seed model is not sourced from the cache + context.clear_caches() + + # Make sure to use the selector so that the seed model is sourced from the state + plan = context.plan_builder("dev", select_models=[model.name]).build() + assert len(plan.directly_modified) == 1 + assert list(plan.directly_modified)[0].name == model.fqn + assert len(plan.missing_intervals) == 1 + assert plan.missing_intervals[0].snapshot_id.name == model.fqn + + # Make sure there's no error when applying the plan + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_change_standalone_audit( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + # Change a model upstream from a standalone audit + model = context.get_model("sushi.items") + model = model.copy(update={"stamp": "force new version"}) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + + # Make sure the standalone audit is among modified + assert ( + context.get_snapshot("assert_item_price_above_zero").snapshot_id + in plan.indirectly_modified[context.get_snapshot("sushi.items").snapshot_id] + ) + + # Make sure there's no error when applying the plan + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_seed_model_change_schema( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + new_csv = [] + with open(context.path / "seeds" / "waiter_names.csv", "r") as fd: + is_header = True + for idx, line in enumerate(fd): + line = line.strip() + if not line: + continue + if is_header: + new_csv.append(line + ",new_column") + is_header = False + else: + new_csv.append(line + f",v{idx}") + + with open(context.path / "seeds" / "waiter_names.csv", "w") as fd: + fd.write("\n".join(new_csv)) + + context.load() + + downstream_model = context.get_model("sushi.waiter_as_customer_by_day") + downstream_model_kind = downstream_model.kind.dict() + downstream_model_kwargs = { + **downstream_model.dict(), + "kind": { + **downstream_model_kind, + "on_destructive_change": "allow", + }, + "audits": [], + # Use the new column + "query": "SELECT '2023-01-07' AS event_date, new_column AS new_column FROM sushi.waiter_names", + } + context.upsert_model(SqlModel.parse_obj(downstream_model_kwargs)) + + context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True, enable_preview=True) + + assert ( + context.engine_adapter.fetchone( + "SELECT COUNT(*) FROM sushi__dev.waiter_as_customer_by_day" + )[0] + == len(new_csv) - 1 + ) + + # Deploy to prod + context.clear_caches() + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + assert "new_column" in context.engine_adapter.columns("sushi.waiter_as_customer_by_day") diff --git a/tests/core/integration/test_forward_only.py b/tests/core/integration/test_forward_only.py new file mode 100644 index 0000000000..4d61915305 --- /dev/null +++ b/tests/core/integration/test_forward_only.py @@ -0,0 +1,1510 @@ +from __future__ import annotations + +import typing as t +import numpy as np # noqa: TID253 +import pandas as pd # noqa: TID253 +import pytest +import time_machine +from pytest_mock.plugin import MockerFixture + +from sqlmesh.core import dialect as d +from sqlmesh.core.context import Context +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + FullKind, + SqlModel, + load_sql_based_model, +) +from sqlmesh.core.plan import PlanBuilder, SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import to_datetime, to_timestamp +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@pytest.fixture(autouse=True) +def mock_choices(mocker: MockerFixture): + mocker.patch("sqlmesh.core.console.TerminalConsole._get_snapshot_change_category") + mocker.patch("sqlmesh.core.console.TerminalConsole._prompt_backfill") + + +def plan_choice(plan_builder: PlanBuilder, choice: SnapshotChangeCategory) -> None: + for snapshot in plan_builder.build().snapshots.values(): + if not snapshot.version: + plan_builder.set_choice(snapshot, choice) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@pytest.mark.parametrize( + "context_fixture", + ["sushi_context", "sushi_no_default_catalog"], +) +def test_forward_only_plan_with_effective_date(context_fixture: Context, request): + context = request.getfixturevalue(context_fixture) + model_name = "sushi.waiter_revenue_by_day" + model = context.get_model(model_name) + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model)), start="2023-01-01") + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan_builder = context.plan_builder("dev", skip_tests=True, forward_only=True) + plan = plan_builder.build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only + + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + ] + + plan = plan_builder.set_effective_from("2023-01-05").build() + # Default start should be set to effective_from + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + plan = plan_builder.set_start("2023-01-06").build() + # Start override should take precedence + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + plan = plan_builder.set_effective_from("2023-01-04").build() + # Start should remain unchanged + assert plan.start == "2023-01-06" + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [ + pd.to_datetime("2023-01-06"), + pd.to_datetime("2023-01-07"), + ] + + prod_plan = context.plan_builder(skip_tests=True).build() + # Make sure that the previously set effective_from is respected + assert prod_plan.start == to_timestamp("2023-01-04") + assert prod_plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(prod_plan) + + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert prod_df["event_date"].tolist() == [ + pd.to_datetime(x) for x in ["2023-01-04", "2023-01-05", "2023-01-06", "2023-01-07"] + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_model_regular_plan(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = add_projection_to_model(t.cast(SqlModel, model)) + forward_only_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"kind": forward_only_kind}) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only + + assert plan.start == to_datetime("2023-01-01") + assert not plan.missing_intervals + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert not dev_df["event_date"].tolist() + + # Run a restatement plan to preview changes + plan_builder = context.plan_builder( + "dev", skip_tests=True, restate_models=[model_name], enable_preview=False + ) + plan_builder.set_start("2023-01-06") + assert plan_builder.build().missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Make sure that changed start is reflected in missing intervals + plan_builder.set_start("2023-01-07") + assert plan_builder.build().missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan_builder.build()) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] + + # Promote changes to prod + prod_plan = context.plan_builder(skip_tests=True).build() + assert not prod_plan.missing_intervals + + context.apply(prod_plan) + + # The change was applied in a forward-only manner so no values in the new column should be populated + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert not prod_df["event_date"].tolist() + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_model_regular_plan_preview_enabled(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = add_projection_to_model(t.cast(SqlModel, model)) + forward_only_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"kind": forward_only_kind}) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only + + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_model_restate_full_history_in_dev(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + model_name = "memory.sushi.customer_max_revenue" + expressions = d.parse( + f""" + MODEL ( + name {model_name}, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key customer_id, + forward_only true, + ), + ); + + SELECT + customer_id, MAX(revenue) AS max_revenue + FROM memory.sushi.customer_revenue_lifetime + GROUP BY 1; + """ + ) + + model = load_sql_based_model(expressions) + assert model.forward_only + assert model.kind.full_history_restatement_only + context.upsert_model(model) + + context.plan("prod", skip_tests=True, auto_apply=True, enable_preview=False) + + model_kwargs = { + **model.dict(), + # Make a breaking change. + "query": model.query.order_by("customer_id"), # type: ignore + } + context.upsert_model(SqlModel.parse_obj(model_kwargs)) + + # Apply the model change in dev + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert not plan.missing_intervals + context.apply(plan) + + snapshot = context.get_snapshot(model, raise_if_missing=True) + snapshot_table_name = snapshot.table_name(False) + + # Manually insert a dummy value to check that the table is recreated during the restatement + context.engine_adapter.insert_append( + snapshot_table_name, + pd.DataFrame({"customer_id": [-1], "max_revenue": [100]}), + ) + df = context.engine_adapter.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" + ) + assert df["cnt"][0] == 1 + + # Apply a restatement plan in dev + plan = context.plan("dev", restate_models=[model.name], auto_apply=True, enable_preview=False) + assert len(plan.missing_intervals) == 1 + + # Check that the dummy value is not present + df = context.engine_adapter.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" + ) + assert df["cnt"][0] == 0 + + # Check that the table is not empty + df = context.engine_adapter.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue" + ) + assert df["cnt"][0] > 0 + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_full_history_restatement_model_regular_plan_preview_enabled( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.marketing" # SCD2 model + + model = context.get_model(model_name) + model = add_projection_to_model(t.cast(SqlModel, model)) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + customers_snapshot = context.get_snapshot("sushi.customers", raise_if_missing=True) + active_customers_snapshot = context.get_snapshot( + "sushi.active_customers", raise_if_missing=True + ) + waiter_as_customer_snapshot = context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + + assert len(plan.new_snapshots) == 6 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[customers_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[active_customers_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[waiter_as_customer_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert all(s.is_forward_only for s in plan.new_snapshots) + + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_metadata_changed_regular_plan_preview_enabled(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = model.copy(update={"owner": "new_owner"}) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.METADATA + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.METADATA + ) + assert not plan.missing_intervals + assert not plan.restatements + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_forward_only_preview_child_that_runs_before_parent(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # This model runs at minute 30 of every hour + upstream_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.upstream_model, + kind FULL, + cron '30 * * * *', + start '2023-01-01', + ); + + SELECT 1 AS a; + """ + ) + ) + context.upsert_model(upstream_model) + + # This model runs at minute 0 of every hour, so it runs before the upstream model + downstream_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.downstream_model, + kind INCREMENTAL_BY_TIME_RANGE( + time_column event_date, + forward_only True, + ), + cron '0 * * * *', + start '2023-01-01', + ); + + SELECT a, '2023-01-06' AS event_date FROM memory.sushi.upstream_model; + """ + ) + ) + context.upsert_model(downstream_model) + + context.plan("prod", skip_tests=True, auto_apply=True) + + with time_machine.travel("2023-01-08 00:05:00 UTC"): + # The downstream model runs but not the upstream model + context.run("prod") + + # Now it's time for the upstream model to run but it hasn't run yet + with time_machine.travel("2023-01-08 00:35:00 UTC"): + # Make a change to the downstream model. + downstream_model = add_projection_to_model(t.cast(SqlModel, downstream_model), literal=True) + context.upsert_model(downstream_model) + + # The plan should only backfill the downstream model despite upstream missing intervals + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot( + downstream_model.name, raise_if_missing=True + ).snapshot_id, + intervals=[ + (to_timestamp("2023-01-07 23:00:00"), to_timestamp("2023-01-08 00:00:00")) + ], + ), + ] + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_forward_only_monthly_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj( + { + **model.dict(), + "kind": model.kind.copy(update={"forward_only": True}), + "cron": "0 0 1 * *", + "start": "2022-01-01", + "audits": [], + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) + assert waiter_revenue_by_day_snapshot.intervals == [ + (to_timestamp("2022-01-01"), to_timestamp("2023-01-01")) + ] + + model = add_projection_to_model(t.cast(SqlModel, model), literal=True) + context.upsert_model(model) + + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + + plan = context.plan_builder( + "dev", select_models=[model.name], skip_tests=True, enable_preview=True + ).build() + assert to_timestamp(plan.start) == to_timestamp("2022-12-01") + assert to_timestamp(plan.end) == to_timestamp("2023-01-08") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[(to_timestamp("2022-12-01"), to_timestamp("2023-01-01"))], + ), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_parent_created_in_dev_child_created_in_prod( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") + waiter_revenue_by_day_model = add_projection_to_model( + t.cast(SqlModel, waiter_revenue_by_day_model) + ) + forward_only_kind = waiter_revenue_by_day_model.kind.copy(update={"forward_only": True}) + waiter_revenue_by_day_model = waiter_revenue_by_day_model.copy( + update={"kind": forward_only_kind} + ) + context.upsert_model(waiter_revenue_by_day_model) + + waiter_revenue_by_day_snapshot = context.get_snapshot( + waiter_revenue_by_day_model, raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert all(s.is_forward_only for s in plan.new_snapshots) + assert plan.start == to_datetime("2023-01-01") + assert not plan.missing_intervals + + context.apply(plan) + + # Update the child to refer to a newly added column. + top_waiters_model = context.get_model("sushi.top_waiters") + top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=False) + context.upsert_model(top_waiters_model) + + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_view_migration( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.top_waiters") + assert model.kind.is_view + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + + # Apply a forward-only plan + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True, forward_only=True) + + # Make sure that the new column got reflected in the view schema + df = context.fetchdf("SELECT one FROM sushi.top_waiters LIMIT 1") + assert len(df) == 1 + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_new_forward_only_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, enable_preview=False) + + snapshot = context.get_snapshot("sushi.marketing") + + # The deployable table should not exist yet + assert not context.engine_adapter.table_exists(snapshot.table_name()) + assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) + + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) + + assert context.engine_adapter.table_exists(snapshot.table_name()) + assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) + + +@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) +@pytest.mark.parametrize("has_view_binding", [False, True]) +def test_non_breaking_change_after_forward_only_in_dev( + init_and_plan_context: t.Callable, has_view_binding: bool +): + context, plan = init_and_plan_context("examples/sushi") + context.snapshot_evaluator.adapter.HAS_VIEW_BINDING = has_view_binding + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert all(s.is_forward_only for s in plan.new_snapshots) + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + ] + + # Apply the forward-only changes first. + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] + + # Make a non-breaking change to a model downstream. + model = context.get_model("sushi.top_waiters") + # Select 'one' column from the updated upstream model. + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model), literal=False)) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert to_timestamp(plan.start) == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the non-breaking changes. + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT waiter_id FROM sushi__dev.top_waiters WHERE one IS NOT NULL" + ) + assert not dev_df.empty + + prod_df = context.engine_adapter.fetchdf("DESCRIBE sushi.top_waiters") + assert "one" not in prod_df["column_name"].tolist() + + # Deploy both changes to prod. + plan = context.plan_builder("prod", skip_tests=True).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert prod_df.empty + + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT waiter_id FROM sushi.top_waiters WHERE one IS NOT NULL" + ) + assert prod_df.empty + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_change_after_forward_only_in_dev(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + # Make sure that the most downstream model is a materialized model. + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + # Make sushi.orders a forward-only model. + model = context.get_model("sushi.orders") + updated_model_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"stamp": "force new version", "kind": updated_model_kind}) + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert not plan.requires_backfill + context.apply(plan) + + # Make a non-breaking change to a model. + model = context.get_model("sushi.top_waiters") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the non-breaking changes. + context.apply(plan) + + # Make a non-breaking change upstream from the previously modified model. + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the upstream non-breaking changes. + context.apply(plan) + assert not context.plan_builder("dev", skip_tests=True).build().requires_backfill + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_changes_downstream_of_indirect_non_breaking_snapshot_without_intervals( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Make a breaking change first but don't backfill it + model = context.get_model("sushi.orders") + model = model.copy(update={"stamp": "force new version"}) + context.upsert_model(model) + plan_builder = context.plan_builder( + "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True + ) + plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.BREAKING) + context.apply(plan_builder.build()) + + # Now make a non-breaking change to the same snapshot. + model = model.copy(update={"stamp": "force another new version"}) + context.upsert_model(model) + plan_builder = context.plan_builder( + "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True + ) + plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.NON_BREAKING) + context.apply(plan_builder.build()) + + # Now make a change to a model downstream of the above model. + downstream_model = context.get_model("sushi.top_waiters") + downstream_model = downstream_model.copy(update={"stamp": "yet another new version"}) + context.upsert_model(downstream_model) + plan = context.plan_builder("dev", skip_tests=True).build() + + # If the parent is not representative then the child cannot be deployable + deployability_index = plan.deployability_index + assert not deployability_index.is_representative( + context.get_snapshot("sushi.waiter_revenue_by_day") + ) + assert not deployability_index.is_deployable(context.get_snapshot("sushi.top_waiters")) + + +@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) +def test_metadata_change_after_forward_only_results_in_migration(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Make a forward-only change + model = context.get_model("sushi.waiter_revenue_by_day") + model = model.copy(update={"kind": model.kind.copy(update={"forward_only": True})}) + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + assert len(plan.new_snapshots) == 2 + assert all(s.is_forward_only for s in plan.new_snapshots) + + # Follow-up with a metadata change in the same environment + model = model.copy(update={"owner": "new_owner"}) + context.upsert_model(model) + plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + assert len(plan.new_snapshots) == 2 + assert all(s.change_category == SnapshotChangeCategory.METADATA for s in plan.new_snapshots) + + # Deploy the latest change to prod + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + # Check that the new column was added in prod + columns = context.engine_adapter.columns("sushi.waiter_revenue_by_day") + assert "one" in columns + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_downstream_of_forward_only(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Make sushi.orders a forward-only model. + forward_only_model = context.get_model("sushi.orders") + updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) + forward_only_model = forward_only_model.copy( + update={"stamp": "force new version", "kind": updated_model_kind} + ) + context.upsert_model(forward_only_model) + forward_only_snapshot = context.get_snapshot(forward_only_model, raise_if_missing=True) + + non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") + non_breaking_model = non_breaking_model.copy(update={"start": "2023-01-01"}) + context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) + non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) + top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert ( + plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].is_forward_only + assert not plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].is_forward_only + assert not plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].is_forward_only + + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiter_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=non_breaking_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("dev", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiter_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=non_breaking_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_view_model_non_representative_snapshot( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + # Forward-only parent + forward_only_model_name = "memory.sushi.test_forward_only_model" + forward_only_model_expressions = d.parse( + f""" + MODEL ( + name {forward_only_model_name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + ), + ); + + SELECT '2023-01-01' AS ds, 'value' AS value; + """ + ) + forward_only_model = load_sql_based_model(forward_only_model_expressions) + assert forward_only_model.forward_only + context.upsert_model(forward_only_model) + + # FULL downstream model. + full_downstream_model_name = "memory.sushi.test_full_downstream_model" + full_downstream_model_expressions = d.parse( + f""" + MODEL ( + name {full_downstream_model_name}, + kind FULL, + ); + + SELECT ds, value FROM {forward_only_model_name}; + """ + ) + full_downstream_model = load_sql_based_model(full_downstream_model_expressions) + context.upsert_model(full_downstream_model) + + # VIEW downstream of the previous FULL model. + view_downstream_model_name = "memory.sushi.test_view_downstream_model" + view_downstream_model_expressions = d.parse( + f""" + MODEL ( + name {view_downstream_model_name}, + kind VIEW, + ); + + SELECT ds, value FROM {full_downstream_model_name}; + """ + ) + view_downstream_model = load_sql_based_model(view_downstream_model_expressions) + context.upsert_model(view_downstream_model) + + # Apply the initial plan with all 3 models. + context.plan(auto_apply=True, no_prompts=True) + + # Make a change to the forward-only model and apply it in dev. + context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) + forward_only_model_snapshot_id = context.get_snapshot(forward_only_model_name).snapshot_id + full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id + view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id + dev_plan = context.plan("dev", auto_apply=True, no_prompts=True, enable_preview=False) + assert ( + dev_plan.snapshots[forward_only_model_snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + dev_plan.snapshots[full_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + dev_plan.snapshots[view_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert not dev_plan.missing_intervals + + # Make a follow-up breaking change to the downstream full model. + new_full_downstream_model_expressions = d.parse( + f""" + MODEL ( + name {full_downstream_model_name}, + kind FULL, + ); + + SELECT ds, 'new_value' AS value FROM {forward_only_model_name}; + """ + ) + new_full_downstream_model = load_sql_based_model(new_full_downstream_model_expressions) + context.upsert_model(new_full_downstream_model) + full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id + view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id + dev_plan = context.plan( + "dev", + categorizer_config=CategorizerConfig.all_full(), + auto_apply=True, + no_prompts=True, + enable_preview=False, + ) + assert ( + dev_plan.snapshots[full_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + dev_plan.snapshots[view_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_BREAKING + ) + assert len(dev_plan.missing_intervals) == 2 + assert dev_plan.missing_intervals[0].snapshot_id == full_downstream_model_snapshot_id + assert dev_plan.missing_intervals[1].snapshot_id == view_downstream_model_snapshot_id + + # Check that the representative view hasn't been created yet. + assert not context.engine_adapter.table_exists( + context.get_snapshot(view_downstream_model_name).table_name() + ) + + # Now promote the very first change to prod without promoting the 2nd breaking change. + context.upsert_model(full_downstream_model) + context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) + + # Finally, make a non-breaking change to the full model in the same dev environment. + context.upsert_model(add_projection_to_model(t.cast(SqlModel, new_full_downstream_model))) + full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id + view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id + dev_plan = context.plan( + "dev", + categorizer_config=CategorizerConfig.all_full(), + auto_apply=True, + no_prompts=True, + enable_preview=False, + ) + assert ( + dev_plan.snapshots[full_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + dev_plan.snapshots[view_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + + # Deploy changes to prod + context.plan("prod", auto_apply=True, no_prompts=True) + + # Check that the representative view has been created. + assert context.engine_adapter.table_exists( + context.get_snapshot(view_downstream_model_name).table_name() + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_view_model_non_representative_snapshot_migration( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + forward_only_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.forward_only_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + forward_only_model = load_sql_based_model(forward_only_model_expr) + context.upsert_model(forward_only_model) + + downstream_view_a_expr = d.parse( + """ + MODEL ( + name memory.sushi.downstream_view_a, + kind VIEW, + ); + + SELECT a from memory.sushi.forward_only_model; + """ + ) + downstream_view_a = load_sql_based_model(downstream_view_a_expr) + context.upsert_model(downstream_view_a) + + downstream_view_b_expr = d.parse( + """ + MODEL ( + name memory.sushi.downstream_view_b, + kind VIEW, + ); + + SELECT a from memory.sushi.downstream_view_a; + """ + ) + downstream_view_b = load_sql_based_model(downstream_view_b_expr) + context.upsert_model(downstream_view_b) + + context.plan(auto_apply=True, no_prompts=True, skip_tests=True) + + # Make a forward-only change + context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) + # Make a non-breaking change downstream + context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_view_a))) + + context.plan(auto_apply=True, no_prompts=True, skip_tests=True) + + # Make sure the downstrean indirect non-breaking view is available in prod + count = context.engine_adapter.fetchone("SELECT COUNT(*) FROM memory.sushi.downstream_view_b")[ + 0 + ] + assert count > 0 + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_new_forward_only_model_concurrent_versions(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + + # Add the first version of the model and apply it to dev_a. + context.upsert_model(new_model) + snapshot_a = context.get_snapshot(new_model.name) + plan_a = context.plan_builder("dev_a").build() + snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] + + assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots + assert snapshot_a.snapshot_id in plan_a.context_diff.added + assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING + + context.apply(plan_a) + + new_model_alt_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS b; + """ + ) + new_model_alt = load_sql_based_model(new_model_alt_expr) + + # Add the second version of the model but don't apply it yet + context.upsert_model(new_model_alt) + snapshot_b = context.get_snapshot(new_model_alt.name) + plan_b = context.plan_builder("dev_b").build() + snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] + + assert snapshot_b.snapshot_id in plan_b.context_diff.new_snapshots + assert snapshot_b.snapshot_id in plan_b.context_diff.added + assert snapshot_b.change_category == SnapshotChangeCategory.BREAKING + + assert snapshot_b.fingerprint != snapshot_a.fingerprint + assert snapshot_b.version == snapshot_a.version + + # Apply the 1st version to prod + context.upsert_model(new_model) + plan_prod_a = context.plan_builder("prod").build() + assert snapshot_a.snapshot_id in plan_prod_a.snapshots + assert ( + plan_prod_a.snapshots[snapshot_a.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + context.apply(plan_prod_a) + + df = context.fetchdf("SELECT * FROM memory.sushi.new_model") + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} + + # Modify the 1st version in prod to trigger a forward-only change + new_model = add_projection_to_model(t.cast(SqlModel, new_model)) + context.upsert_model(new_model) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Apply the 2nd version to dev_b. + # At this point the snapshot of the 2nd version has already been categorized but not + # persisted in the state. This means that when the snapshot of the 1st version was + # being unpaused during promotion to prod, the state of the 2nd version snapshot was not updated + context.apply(plan_b) + + # Apply the 2nd version to prod + context.upsert_model(new_model_alt) + plan_prod_b = context.plan_builder("prod").build() + assert ( + plan_prod_b.snapshots[snapshot_b.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert not plan_prod_b.requires_backfill + context.apply(plan_prod_b) + + df = context.fetchdf("SELECT * FROM memory.sushi.new_model").replace({np.nan: None}) + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: None}} + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_new_forward_only_model_same_dev_environment(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + + # Add the first version of the model and apply it to dev. + context.upsert_model(new_model) + snapshot_a = context.get_snapshot(new_model.name) + plan_a = context.plan_builder("dev").build() + snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] + + assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots + assert snapshot_a.snapshot_id in plan_a.context_diff.added + assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING + + context.apply(plan_a) + + df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model") + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} + + new_model_alt_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS b; + """ + ) + new_model_alt = load_sql_based_model(new_model_alt_expr) + + # Add the second version of the model and apply it to the same environment. + context.upsert_model(new_model_alt) + snapshot_b = context.get_snapshot(new_model_alt.name) + + context.invalidate_environment("dev", sync=True) + plan_b = context.plan_builder("dev").build() + snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] + + context.apply(plan_b) + + df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model").replace({np.nan: None}) + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: 1}} diff --git a/tests/core/integration/test_model_kinds.py b/tests/core/integration/test_model_kinds.py new file mode 100644 index 0000000000..1cc1bf7aeb --- /dev/null +++ b/tests/core/integration/test_model_kinds.py @@ -0,0 +1,2644 @@ +from __future__ import annotations + +import typing as t +from collections import Counter +from datetime import timedelta +from unittest import mock +import pandas as pd # noqa: TID253 +import pytest +from pathlib import Path +import time_machine +from pytest_mock.plugin import MockerFixture +from sqlglot import exp + +from sqlmesh import CustomMaterialization +from sqlmesh.core import dialect as d +from sqlmesh.core.config import ( + Config, + ModelDefaultsConfig, + DuckDBConnectionConfig, + GatewayConfig, +) +from sqlmesh.core.console import Console +from sqlmesh.core.context import Context +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + Model, + SqlModel, + CustomKind, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.utils.date import to_date, to_timestamp +from sqlmesh.utils.pydantic import validate_string +from tests.conftest import SushiDataValidator +from sqlmesh.utils import CorrelationId +from tests.utils.test_filesystem import create_temp_file + +if t.TYPE_CHECKING: + from sqlmesh import QueryOrDF + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_incremental_by_partition(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + source_name = "raw.test_incremental_by_partition" + model_name = "memory.sushi.test_incremental_by_partition" + + expressions = d.parse( + f""" + MODEL ( + name {model_name}, + kind INCREMENTAL_BY_PARTITION (disable_restatement false), + partitioned_by [key], + allow_partials true, + start '2023-01-07', + ); + + SELECT key, value FROM {source_name}; + """ + ) + model = load_sql_based_model(expressions) + context.upsert_model(model) + + context.engine_adapter.ctas( + source_name, + d.parse_one("SELECT 'key_a' AS key, 1 AS value"), + ) + + context.plan(auto_apply=True, no_prompts=True) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_a", 1), + ] + + context.engine_adapter.replace_query( + source_name, + d.parse_one("SELECT 'key_b' AS key, 1 AS value"), + ) + context.run(ignore_cron=True) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_a", 1), + ("key_b", 1), + ] + + context.engine_adapter.replace_query( + source_name, + d.parse_one("SELECT 'key_a' AS key, 2 AS value"), + ) + # Run 1 minute later. + with time_machine.travel("2023-01-08 15:01:00 UTC"): + context.run(ignore_cron=True) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_b", 1), + ("key_a", 2), + ] + + # model should fully refresh on restatement + context.engine_adapter.replace_query( + source_name, + d.parse_one("SELECT 'key_c' AS key, 3 AS value"), + ) + context.plan(auto_apply=True, no_prompts=True, restate_models=[model_name]) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_c", 3), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_custom_materialization(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + custom_insert_called = False + + class CustomFullMaterialization(CustomMaterialization): + NAME = "test_custom_full" + + def insert( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + nonlocal custom_insert_called + custom_insert_called = True + + self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) + + model = context.get_model("sushi.top_waiters") + kwargs = { + **model.dict(), + # Make a breaking change. + "kind": dict(name="CUSTOM", materialization="test_custom_full"), + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + context.plan(auto_apply=True, no_prompts=True) + + assert custom_insert_called + + +# needs to be defined at the top level. If its defined within the test body, +# adding to the snapshot cache fails with: AttributeError: Can't pickle local object +class TestCustomKind(CustomKind): + __test__ = False # prevent pytest warning since this isnt a class containing tests + + @property + def custom_property(self) -> str: + return validate_string(self.materialization_properties.get("custom_property")) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_custom_materialization_with_custom_kind(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + custom_insert_calls = [] + + class CustomFullMaterialization(CustomMaterialization[TestCustomKind]): + NAME = "test_custom_full_with_custom_kind" + + def insert( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + assert isinstance(model.kind, TestCustomKind) + + nonlocal custom_insert_calls + custom_insert_calls.append(model.kind.custom_property) + + self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) + + model = context.get_model("sushi.top_waiters") + kwargs = { + **model.dict(), + # Make a breaking change. + "kind": dict( + name="CUSTOM", + materialization="test_custom_full_with_custom_kind", + materialization_properties={"custom_property": "pytest"}, + ), + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + context.plan(auto_apply=True) + + assert custom_insert_calls == ["pytest"] + + # no changes + context.plan(auto_apply=True) + + assert custom_insert_calls == ["pytest"] + + # change a property on the custom kind, breaking change + kwargs["kind"]["materialization_properties"]["custom_property"] = "some value" + context.upsert_model(SqlModel.parse_obj(kwargs)) + context.plan(auto_apply=True) + + assert custom_insert_calls == ["pytest", "some value"] + + +def test_incremental_time_self_reference( + mocker: MockerFixture, sushi_context: Context, sushi_data_validator: SushiDataValidator +): + start_ts = to_timestamp("1 week ago") + start_date, end_date = to_date("1 week ago"), to_date("yesterday") + if to_timestamp(start_date) < start_ts: + # The start date must be aligned by the interval unit. + start_date += timedelta(days=1) + + df = sushi_context.engine_adapter.fetchdf( + "SELECT MIN(event_date) FROM sushi.customer_revenue_lifetime" + ) + assert df.iloc[0, 0] == pd.to_datetime(start_date) + df = sushi_context.engine_adapter.fetchdf( + "SELECT MAX(event_date) FROM sushi.customer_revenue_lifetime" + ) + assert df.iloc[0, 0] == pd.to_datetime(end_date) + results = sushi_data_validator.validate("sushi.customer_revenue_lifetime", start_date, end_date) + plan = sushi_context.plan_builder( + restate_models=["sushi.customer_revenue_lifetime", "sushi.customer_revenue_by_day"], + start=start_date, + end="5 days ago", + ).build() + revenue_lifeteime_snapshot = sushi_context.get_snapshot( + "sushi.customer_revenue_lifetime", raise_if_missing=True + ) + revenue_by_day_snapshot = sushi_context.get_snapshot( + "sushi.customer_revenue_by_day", raise_if_missing=True + ) + assert sorted(plan.missing_intervals, key=lambda x: x.snapshot_id) == sorted( + [ + SnapshotIntervals( + snapshot_id=revenue_lifeteime_snapshot.snapshot_id, + intervals=[ + (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), + (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), + (to_timestamp(to_date("5 days ago")), to_timestamp(to_date("4 days ago"))), + (to_timestamp(to_date("4 days ago")), to_timestamp(to_date("3 days ago"))), + (to_timestamp(to_date("3 days ago")), to_timestamp(to_date("2 days ago"))), + (to_timestamp(to_date("2 days ago")), to_timestamp(to_date("1 days ago"))), + (to_timestamp(to_date("1 day ago")), to_timestamp(to_date("today"))), + ], + ), + SnapshotIntervals( + snapshot_id=revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), + (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), + ], + ), + ], + key=lambda x: x.snapshot_id, + ) + sushi_context.console = mocker.Mock(spec=Console) + sushi_context.apply(plan) + num_batch_calls = Counter( + [x[0][0] for x in sushi_context.console.update_snapshot_evaluation_progress.call_args_list] # type: ignore + ) + # Validate that we made 7 calls to the customer_revenue_lifetime snapshot and 1 call to the customer_revenue_by_day snapshot + assert num_batch_calls == { + sushi_context.get_snapshot("sushi.customer_revenue_lifetime", raise_if_missing=True): 7, + sushi_context.get_snapshot("sushi.customer_revenue_by_day", raise_if_missing=True): 1, + } + # Validate that the results are the same as before the restate + assert results == sushi_data_validator.validate( + "sushi.customer_revenue_lifetime", start_date, end_date + ) + + +def test_incremental_by_time_model_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + assert updated_df["new_column"].dropna().tolist() == [3] + + with time_machine.travel("2023-01-11 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + CAST(4 AS STRING) as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 3 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + # The destructive change was ignored but this change is coercable and therefore we still return ints + assert updated_df["new_column"].dropna().tolist() == [3, 4] + + with time_machine.travel("2023-01-12 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + CAST(5 AS STRING) as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + # Make the change compatible since that means we will attempt and alter now that is considered additive + context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { + exp.DataType.build("INT"): {exp.DataType.build("STRING")} + } + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 4 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + # The change is now reflected since an additive alter could be performed + assert updated_df["new_column"].dropna().tolist() == ["3", "4", "5"] + + context.close() + + +def test_incremental_by_time_model_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column to the source table + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("ALTER TABLE source_table ADD COLUMN new_column INT") + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is removed since destructive is allowed + assert "name" not in updated_df.columns + # new_column is not added since additive is ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was applied + assert "name" not in updated_df.columns + # new_column is still not added since additive is ignored + assert "new_column" not in updated_df.columns + + with time_machine.travel("2023-01-11 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + CAST(1 AS STRING) as id, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { + exp.DataType.build("INT"): {exp.DataType.build("STRING")} + } + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 3 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is still not added since additive is ignored + assert "new_column" not in updated_df.columns + # The additive change was ignored since we set the change as compatible therefore + # instead of getting strings in the result we still return ints + assert updated_df["id"].tolist() == [1, 1, 1] + + with time_machine.travel("2023-01-12 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change allow + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + CAST(1 AS STRING) as id, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + # Make the change compatible since that means we will attempt and alter now that is considered additive + context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { + exp.DataType.build("INT"): {exp.DataType.build("STRING")} + } + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 4 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is now added since it is additive is now allowed + assert "new_column" in updated_df.columns + # The change is now reflected since an additive alter could be performed + assert updated_df["id"].dropna().tolist() == ["1", "1", "1", "1"] + + context.close() + + +def test_incremental_by_unique_key_model_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_incremental_by_unique_key_model_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_incremental_unmanaged_model_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_incremental_unmanaged_model_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_scd_type_2_by_time_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_dt as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_dt as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_scd_type_2_by_time_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_dt as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_dt as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_scd_type_2_by_column_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [name], + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [new_column], + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_scd_type_2_by_column_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [stable], + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + 'stable' as stable, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [stable], + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'stable2' as stable, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was ignored + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_incremental_partition_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_incremental_partition_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change allow, + on_additive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change allow, + on_additive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_incremental_by_time_model_ignore_destructive_change_unit_test(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + test_dir = tmp_path / "tests" + test_dir.mkdir() + test_filepath = test_dir / "test_test_model.yaml" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + name, + ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + initial_test = f""" + +test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + name: 'test_name' + ds: '2025-01-01' + outputs: + query: + - id: 1 + name: 'test_name' + ds: '2025-01-01' +""" + + # Write initial test + test_filepath.write_text(initial_test) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute( + "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" + ) + context.engine_adapter.execute( + "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" + ) + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + new_column, + ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + updated_test = f""" + + test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + new_column: 3 + ds: '2025-01-01' + outputs: + query: + - id: 1 + new_column: 3 + ds: '2025-01-01' + """ + + # Write initial test + test_filepath.write_text(updated_test) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 1 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") + context.run() + test_result = context.test() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + +def test_incremental_by_time_model_ignore_additive_change_unit_test(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + test_dir = tmp_path / "tests" + test_dir.mkdir() + test_filepath = test_dir / "test_test_model.yaml" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + name, + ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + initial_test = f""" + +test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + name: 'test_name' + ds: '2025-01-01' + outputs: + query: + - id: 1 + name: 'test_name' + ds: '2025-01-01' +""" + + # Write initial test + test_filepath.write_text(initial_test) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute( + "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" + ) + context.engine_adapter.execute( + "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" + ) + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + new_column, + ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + # `new_column` is in the output since unit tests are based on the model definition that currently + # exists and doesn't take into account the historical changes to the table. Therefore `new_column` is + # not actually in the table but it is represented in the test + updated_test = f""" + test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + new_column: 3 + ds: '2025-01-01' + outputs: + query: + - id: 1 + new_column: 3 + ds: '2025-01-01' + """ + + # Write initial test + test_filepath.write_text(updated_test) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 1 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not in table since destructive was ignored + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") + context.run() + test_result = context.test() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + +@time_machine.travel("2020-01-01 00:00:00 UTC") +def test_scd_type_2_full_restatement_no_start_date(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Initial product catalog of 3 products + raw_products = d.parse(""" + MODEL ( + name memory.store.raw_products, + kind FULL + ); + + SELECT * FROM VALUES + (101, 'Laptop Pro', 1299.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), + (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), + (103, 'Office Chair', 199.99, 'Furniture', '2020-01-01 00:00:00'::TIMESTAMP) + AS t(product_id, product_name, price, category, last_updated); + """) + + # SCD Type 2 model for product history tracking + product_history = d.parse(""" + MODEL ( + name memory.store.product_history, + kind SCD_TYPE_2_BY_TIME ( + unique_key product_id, + updated_at_name last_updated, + disable_restatement false + ), + owner catalog_team, + cron '0 */6 * * *', + grain product_id, + description 'Product catalog change history' + ); + + SELECT + product_id::INT AS product_id, + product_name::TEXT AS product_name, + price::DECIMAL(10,2) AS price, + category::TEXT AS category, + last_updated AS last_updated + FROM + memory.store.raw_products; + """) + + raw_products_model = load_sql_based_model(raw_products) + product_history_model = load_sql_based_model(product_history) + context.upsert_model(raw_products_model) + context.upsert_model(product_history_model) + + # Initial plan and apply + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + query = "SELECT product_id, product_name, price, category, last_updated, valid_from, valid_to FROM memory.store.product_history ORDER BY product_id, valid_from" + initial_data = context.engine_adapter.fetchdf(query) + + # Validate initial state of 3 products all active + assert len(initial_data) == 3 + assert initial_data["valid_to"].isna().all() + initial_product_names = set(initial_data["product_name"].tolist()) + assert initial_product_names == {"Laptop Pro", "Wireless Mouse", "Office Chair"} + + # Price update and category change + with time_machine.travel("2020-01-15 12:00:00 UTC"): + raw_products_v2 = d.parse(""" + MODEL ( + name memory.store.raw_products, + kind FULL + ); + + SELECT * FROM VALUES + (101, 'Laptop Pro', 1199.99, 'Electronics', '2020-01-15 00:00:00'::TIMESTAMP), + (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), + (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP) + AS t(product_id, product_name, price, category, last_updated); + """) + raw_products_v2_model = load_sql_based_model(raw_products_v2) + context.upsert_model(raw_products_v2_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + context.run() + + data_after_first_change = context.engine_adapter.fetchdf(query) + + # Should have 5 records (3 original closed, 2 new activε, 1 unchanged) + assert len(data_after_first_change) == 5 + + # Second change + with time_machine.travel("2020-02-01 10:00:00 UTC"): + raw_products_v3 = d.parse(""" + MODEL ( + name memory.store.raw_products, + kind FULL + ); + + SELECT * FROM VALUES + (101, 'Laptop Pro Max', 1399.99, 'Electronics', '2020-02-01 00:00:00'::TIMESTAMP), + (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP), + (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP) + AS t(product_id, product_name, price, category, last_updated); + """) + raw_products_v3_model = load_sql_based_model(raw_products_v3) + context.upsert_model(raw_products_v3_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + context.run() + data_after_second_change = context.engine_adapter.fetchdf(query) + assert len(data_after_second_change) == 6 + + # Store the current state before full restatement + data_before_full_restatement = data_after_second_change.copy() + + # Perform full restatement (no start date provided) + with time_machine.travel("2020-02-01 15:00:00 UTC"): + plan = context.plan_builder( + "prod", skip_tests=True, restate_models=["memory.store.product_history"] + ).build() + context.apply(plan) + data_after_full_restatement = context.engine_adapter.fetchdf(query) + assert len(data_after_full_restatement) == 3 + + # Check that all currently active products before restatement are still active after restatement + active_before = data_before_full_restatement[ + data_before_full_restatement["valid_to"].isna() + ] + active_after = data_after_full_restatement + assert set(active_before["product_id"]) == set(active_after["product_id"]) + + expected_products = { + 101: { + "product_name": "Laptop Pro Max", + "price": 1399.99, + "category": "Electronics", + "last_updated": "2020-02-01", + }, + 102: { + "product_name": "Wireless Mouse", + "price": 49.99, + "category": "Electronics", + "last_updated": "2020-01-01", + }, + 103: { + "product_name": "Ergonomic Office Chair", + "price": 229.99, + "category": "Office Furniture", + "last_updated": "2020-01-15", + }, + } + for _, row in data_after_full_restatement.iterrows(): + pid = row["product_id"] + assert pid in expected_products + expected = expected_products[pid] + assert row["product_name"] == expected["product_name"] + assert float(row["price"]) == expected["price"] + assert row["category"] == expected["category"] + + # valid_from should be the epoch, valid_to should be NaT + assert str(row["valid_from"]) == "1970-01-01 00:00:00" + assert pd.isna(row["valid_to"]) + + +def test_plan_evaluator_correlation_id(tmp_path: Path): + def _correlation_id_in_sqls(correlation_id: CorrelationId, mock_logger): + sqls = [call[0][0] for call in mock_logger.call_args_list] + return any(f"/* {correlation_id} */" in sql for sql in sqls) + + ctx = Context(paths=[tmp_path], config=Config()) + + # Case: Ensure that the correlation id (plan_id) is included in the SQL for each plan + for i in range(2): + create_temp_file( + tmp_path, + Path("models", "test.sql"), + f"MODEL (name test.a, kind FULL); SELECT {i} AS col", + ) + + with mock.patch("sqlmesh.core.engine_adapter.base.EngineAdapter._log_sql") as mock_logger: + ctx.load() + plan = ctx.plan(auto_apply=True, no_prompts=True) + + correlation_id = CorrelationId.from_plan_id(plan.plan_id) + assert str(correlation_id) == f"SQLMESH_PLAN: {plan.plan_id}" + + assert _correlation_id_in_sqls(correlation_id, mock_logger) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_scd_type_2_regular_run_with_offset(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + raw_employee_status = d.parse(""" + MODEL ( + name memory.hr_system.raw_employee_status, + kind FULL + ); + + SELECT + 1001 AS employee_id, + 'engineering' AS department, + 'EMEA' AS region, + '2023-01-08 15:00:00 UTC' AS last_modified; + """) + + employee_history = d.parse(""" + MODEL ( + name memory.hr_system.employee_history, + kind SCD_TYPE_2_BY_TIME ( + unique_key employee_id, + updated_at_name last_modified, + disable_restatement false + ), + owner hr_analytics, + cron '0 7 * * *', + grain employee_id, + description 'Historical tracking of employee status changes' + ); + + SELECT + employee_id::INT AS employee_id, + department::TEXT AS department, + region::TEXT AS region, + last_modified AS last_modified + FROM + memory.hr_system.raw_employee_status; + """) + + raw_employee_status_model = load_sql_based_model(raw_employee_status) + employee_history_model = load_sql_based_model(employee_history) + context.upsert_model(raw_employee_status_model) + context.upsert_model(employee_history_model) + + # Initial plan and apply + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + query = "SELECT employee_id, department, region, valid_from, valid_to FROM memory.hr_system.employee_history ORDER BY employee_id, valid_from" + initial_data = context.engine_adapter.fetchdf(query) + + assert len(initial_data) == 1 + assert initial_data["valid_to"].isna().all() + assert initial_data["department"].tolist() == ["engineering"] + assert initial_data["region"].tolist() == ["EMEA"] + + # Apply a future plan with source changes a few hours before the cron time of the SCD Type 2 model BUT on the same day + with time_machine.travel("2023-01-09 00:10:00 UTC"): + raw_employee_status_v2 = d.parse(""" + MODEL ( + name memory.hr_system.raw_employee_status, + kind FULL + ); + + SELECT + 1001 AS employee_id, + 'engineering' AS department, + 'AMER' AS region, + '2023-01-09 00:10:00 UTC' AS last_modified; + """) + raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) + context.upsert_model(raw_employee_status_v2_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + + # The 7th hour of the day the run is kicked off for the SCD Type 2 model + with time_machine.travel("2023-01-09 07:00:01 UTC"): + context.run() + data_after_change = context.engine_adapter.fetchdf(query) + + # Validate the SCD2 records for employee 1001 + assert len(data_after_change) == 2 + assert data_after_change.iloc[0]["employee_id"] == 1001 + assert data_after_change.iloc[0]["department"] == "engineering" + assert data_after_change.iloc[0]["region"] == "EMEA" + assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" + assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" + assert data_after_change.iloc[1]["employee_id"] == 1001 + assert data_after_change.iloc[1]["department"] == "engineering" + assert data_after_change.iloc[1]["region"] == "AMER" + assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" + assert pd.isna(data_after_change.iloc[1]["valid_to"]) + + # Update source model again a bit later on the same day + raw_employee_status_v2 = d.parse(""" + MODEL ( + name memory.hr_system.raw_employee_status, + kind FULL + ); + + SELECT + 1001 AS employee_id, + 'sales' AS department, + 'ANZ' AS region, + '2023-01-09 07:26:00 UTC' AS last_modified; + """) + raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) + context.upsert_model(raw_employee_status_v2_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + + # A day later the run is kicked off for the SCD Type 2 model again + with time_machine.travel("2023-01-10 07:00:00 UTC"): + context.run() + data_after_change = context.engine_adapter.fetchdf(query) + + # Validate the SCD2 history for employee 1001 after second change with the historical records intact + assert len(data_after_change) == 3 + assert data_after_change.iloc[0]["employee_id"] == 1001 + assert data_after_change.iloc[0]["department"] == "engineering" + assert data_after_change.iloc[0]["region"] == "EMEA" + assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" + assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" + assert data_after_change.iloc[1]["employee_id"] == 1001 + assert data_after_change.iloc[1]["department"] == "engineering" + assert data_after_change.iloc[1]["region"] == "AMER" + assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" + assert str(data_after_change.iloc[1]["valid_to"]) == "2023-01-09 07:26:00" + assert data_after_change.iloc[2]["employee_id"] == 1001 + assert data_after_change.iloc[2]["department"] == "sales" + assert data_after_change.iloc[2]["region"] == "ANZ" + assert str(data_after_change.iloc[2]["valid_from"]) == "2023-01-09 07:26:00" + assert pd.isna(data_after_change.iloc[2]["valid_to"]) + + # Now test restatement works (full restatement support currently) + with time_machine.travel("2023-01-10 07:38:00 UTC"): + plan = context.plan_builder( + "prod", + skip_tests=True, + restate_models=["memory.hr_system.employee_history"], + start="2023-01-09 00:10:00", + ).build() + context.apply(plan) + restated_data = context.engine_adapter.fetchdf(query) + + # Validate the SCD2 history after restatement has been wiped bar one + assert len(restated_data) == 1 + assert restated_data.iloc[0]["employee_id"] == 1001 + assert restated_data.iloc[0]["department"] == "sales" + assert restated_data.iloc[0]["region"] == "ANZ" + assert str(restated_data.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" + assert pd.isna(restated_data.iloc[0]["valid_to"]) + + +def test_seed_model_metadata_update_does_not_trigger_backfill(tmp_path: Path): + """ + Scenario: + - Create a seed model; perform initial population + - Modify the model with a metadata-only change and trigger a plan + + Outcome: + - The seed model is modified (metadata-only) but this should NOT trigger backfill + - There should be no missing_intervals on the plan to backfill + """ + + models_path = tmp_path / "models" + seeds_path = tmp_path / "seeds" + models_path.mkdir() + seeds_path.mkdir() + + seed_model_path = models_path / "seed.sql" + seed_path = seeds_path / "seed_data.csv" + + seed_path.write_text("\n".join(["id,name", "1,test"])) + + seed_model_path.write_text(""" + MODEL ( + name test.source_data, + kind SEED ( + path '../seeds/seed_data.csv' + ) + ); + """) + + config = Config( + gateways={"": GatewayConfig(connection=DuckDBConnectionConfig())}, + model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), + ) + ctx = Context(paths=tmp_path, config=config) + + plan = ctx.plan(auto_apply=True) + + original_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] + assert plan.directly_modified == {original_seed_snapshot.snapshot_id} + assert plan.metadata_updated == set() + assert plan.missing_intervals + + # prove data loaded + assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [ + (1, "test") + ] + + # prove no diff + ctx.load() + plan = ctx.plan(auto_apply=True) + assert not plan.has_changes + assert not plan.missing_intervals + + # make metadata-only change + seed_model_path.write_text(""" + MODEL ( + name test.source_data, + kind SEED ( + path '../seeds/seed_data.csv' + ), + description 'updated by test' + ); + """) + + ctx.load() + plan = ctx.plan(auto_apply=True) + assert plan.has_changes + + new_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] + assert ( + new_seed_snapshot.version == original_seed_snapshot.version + ) # should be using the same physical table + assert ( + new_seed_snapshot.snapshot_id != original_seed_snapshot.snapshot_id + ) # but still be different due to the metadata change + assert plan.directly_modified == set() + assert plan.metadata_updated == {new_seed_snapshot.snapshot_id} + + # there should be no missing intervals to backfill since all we did is update a description + assert not plan.missing_intervals + + # there should still be no diff or missing intervals in 3 days time + assert new_seed_snapshot.model.interval_unit.is_day + with time_machine.travel(timedelta(days=3)): + ctx.clear_caches() + ctx.load() + plan = ctx.plan(auto_apply=True) + assert not plan.has_changes + assert not plan.missing_intervals + + # change seed data + seed_path.write_text("\n".join(["id,name", "1,test", "2,updated"])) + + # new plan - NOW we should backfill because data changed + ctx.load() + plan = ctx.plan(auto_apply=True) + assert plan.has_changes + + updated_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] + + assert ( + updated_seed_snapshot.snapshot_id + != new_seed_snapshot.snapshot_id + != original_seed_snapshot.snapshot_id + ) + assert not updated_seed_snapshot.forward_only + assert plan.directly_modified == {updated_seed_snapshot.snapshot_id} + assert plan.metadata_updated == set() + assert plan.missing_intervals + + # prove backfilled data loaded + assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [ + (1, "test"), + (2, "updated"), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_seed_model_promote_to_prod_after_dev( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with open(context.path / "seeds" / "waiter_names.csv", "a") as f: + f.write("\n10,New Waiter") + + context.load() + + waiter_names_snapshot = context.get_snapshot("sushi.waiter_names") + plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + assert waiter_names_snapshot.snapshot_id in plan.directly_modified + + # Trigger a metadata change to reuse the previous version + waiter_names_model = waiter_names_snapshot.model.copy( + update={"description": "Updated description"} + ) + context.upsert_model(waiter_names_model) + context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + + # Promote all changes to prod + waiter_names_snapshot = context.get_snapshot("sushi.waiter_names") + plan = context.plan_builder("prod", skip_tests=True).build() + # Clear the cache to source the dehydrated model instance from the state + context.clear_caches() + context.apply(plan) + + assert ( + context.engine_adapter.fetchone("SELECT COUNT(*) FROM sushi.waiter_names WHERE id = 10")[0] + == 1 + ) diff --git a/tests/core/integration/test_multi_repo.py b/tests/core/integration/test_multi_repo.py new file mode 100644 index 0000000000..6477b08741 --- /dev/null +++ b/tests/core/integration/test_multi_repo.py @@ -0,0 +1,456 @@ +from __future__ import annotations + +from unittest.mock import patch +from textwrap import dedent +import os +import pytest +from pathlib import Path +from sqlmesh.core.console import ( + get_console, +) +from sqlmesh.core.config.naming import NameInferenceConfig +from sqlmesh.core.model.common import ParsableSql +from sqlmesh.utils.concurrency import NodeExecutionFailedError + +from sqlmesh.core import constants as c +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.console import get_console +from sqlmesh.core.context import Context +from sqlmesh.utils.date import now +from tests.conftest import DuckDBMetadata +from tests.utils.test_helpers import use_terminal_console +from tests.core.integration.utils import validate_apply_basics + + +pytestmark = pytest.mark.slow + + +@use_terminal_console +def test_multi(mocker): + context = Context(paths=["examples/multi/repo_1", "examples/multi/repo_2"], gateway="memory") + + with patch.object(get_console(), "log_warning") as mock_logger: + context.plan_builder(environment="dev") + warnings = mock_logger.call_args[0][0] + repo1_path, repo2_path = context.configs.keys() + assert f"Linter warnings for {repo1_path}" in warnings + assert f"Linter warnings for {repo2_path}" not in warnings + + assert ( + context.render("bronze.a").sql() + == '''SELECT 1 AS "col_a", 'b' AS "col_b", 1 AS "one", 'repo_1' AS "dup"''' + ) + assert ( + context.render("silver.d").sql() + == '''SELECT "c"."col_a" AS "col_a", 2 AS "two", 'repo_2' AS "dup" FROM "memory"."silver"."c" AS "c"''' + ) + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 5 + context.apply(plan) + + # Ensure before_all, after_all statements for multiple repos have executed + environment_statements = context.state_reader.get_environment_statements(c.PROD) + assert len(environment_statements) == 2 + assert context.fetchdf("select * from before_1").to_dict()["1"][0] == 1 + assert context.fetchdf("select * from before_2").to_dict()["2"][0] == 2 + assert context.fetchdf("select * from after_1").to_dict()["repo_1"][0] == "repo_1" + assert context.fetchdf("select * from after_2").to_dict()["repo_2"][0] == "repo_2" + + old_context = context + context = Context( + paths=["examples/multi/repo_1"], + state_sync=old_context.state_sync, + gateway="memory", + ) + context._engine_adapter = old_context.engine_adapter + del context.engine_adapters + + model = context.get_model("bronze.a") + assert model.project == "repo_1" + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql(sql=model.query.select("'c' AS c").sql(dialect=model.dialect)) + } + ) + ) + plan = context.plan_builder().build() + + assert set(snapshot.name for snapshot in plan.directly_modified) == { + '"memory"."bronze"."a"', + '"memory"."bronze"."b"', + '"memory"."silver"."e"', + } + assert sorted([x.name for x in list(plan.indirectly_modified.values())[0]]) == [ + '"memory"."silver"."c"', + '"memory"."silver"."d"', + ] + assert len(plan.missing_intervals) == 3 + context.apply(plan) + validate_apply_basics(context, c.PROD, plan.snapshots.values()) + + # Ensure that before_all and after_all statements of both repos are there despite planning with repo_1 + environment_statements = context.state_reader.get_environment_statements(c.PROD) + assert len(environment_statements) == 2 + + # Ensure that environment statements have the project field set correctly + sorted_env_statements = sorted(environment_statements, key=lambda es: es.project) + assert sorted_env_statements[0].project == "repo_1" + assert sorted_env_statements[1].project == "repo_2" + + # Assert before_all and after_all for each project + assert sorted_env_statements[0].before_all == [ + "CREATE TABLE IF NOT EXISTS before_1 AS select @one()" + ] + assert sorted_env_statements[0].after_all == [ + "CREATE TABLE IF NOT EXISTS after_1 AS select @dup()" + ] + assert sorted_env_statements[1].before_all == [ + "CREATE TABLE IF NOT EXISTS before_2 AS select @two()" + ] + assert sorted_env_statements[1].after_all == [ + "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" + ] + + +@use_terminal_console +def test_multi_repo_single_project_environment_statements_update(copy_to_temp_path): + paths = copy_to_temp_path("examples/multi") + repo_1_path = f"{paths[0]}/repo_1" + repo_2_path = f"{paths[0]}/repo_2" + + context = Context(paths=[repo_1_path, repo_2_path], gateway="memory") + context._new_state_sync().reset(default_catalog=context.default_catalog) + + initial_plan = context.plan_builder().build() + context.apply(initial_plan) + + # Get initial statements + initial_statements = context.state_reader.get_environment_statements(c.PROD) + assert len(initial_statements) == 2 + + # Modify repo_1's config to add a new before_all statement + repo_1_config_path = f"{repo_1_path}/config.yaml" + with open(repo_1_config_path, "r") as f: + config_content = f.read() + + # Add a new before_all statement to repo_1 only + modified_config = config_content.replace( + "CREATE TABLE IF NOT EXISTS before_1 AS select @one()", + "CREATE TABLE IF NOT EXISTS before_1 AS select @one()\n - CREATE TABLE IF NOT EXISTS before_1_modified AS select 999", + ) + + with open(repo_1_config_path, "w") as f: + f.write(modified_config) + + # Create new context with modified config but only for repo_1 + context_repo_1_only = Context( + paths=[repo_1_path], state_sync=context.state_sync, gateway="memory" + ) + + # Plan with only repo_1, this should preserve repo_2's statements from state + repo_1_plan = context_repo_1_only.plan_builder(environment="dev").build() + context_repo_1_only.apply(repo_1_plan) + updated_statements = context_repo_1_only.state_reader.get_environment_statements("dev") + + # Should still have statements from both projects + assert len(updated_statements) == 2 + + # Sort by project + sorted_updated = sorted(updated_statements, key=lambda es: es.project or "") + + # Verify repo_1 has the new statement + repo_1_updated = sorted_updated[0] + assert repo_1_updated.project == "repo_1" + assert len(repo_1_updated.before_all) == 2 + assert "CREATE TABLE IF NOT EXISTS before_1_modified" in repo_1_updated.before_all[1] + + # Verify repo_2 statements are preserved from state + repo_2_preserved = sorted_updated[1] + assert repo_2_preserved.project == "repo_2" + assert len(repo_2_preserved.before_all) == 1 + assert "CREATE TABLE IF NOT EXISTS before_2" in repo_2_preserved.before_all[0] + assert "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" in repo_2_preserved.after_all[0] + + +@use_terminal_console +def test_multi_virtual_layer(copy_to_temp_path): + paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") + path = Path(paths[0]) + first_db_path = str(path / "db_1.db") + second_db_path = str(path / "db_2.db") + + config = Config( + gateways={ + "first": GatewayConfig( + connection=DuckDBConnectionConfig(database=first_db_path), + variables={"overriden_var": "gateway_1"}, + ), + "second": GatewayConfig( + connection=DuckDBConnectionConfig(database=second_db_path), + variables={"overriden_var": "gateway_2"}, + ), + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + model_naming=NameInferenceConfig(infer_names=True), + default_gateway="first", + gateway_managed_virtual_layer=True, + variables={"overriden_var": "global", "global_one": 88}, + ) + + context = Context(paths=paths, config=config) + assert context.default_catalog_per_gateway == {"first": "db_1", "second": "db_2"} + assert len(context.engine_adapters) == 2 + + # For the model without gateway the default should be used and the gateway variable should overide the global + assert ( + context.render("first_schema.model_one").sql() + == 'SELECT \'gateway_1\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' + ) + + # For model with gateway specified the appropriate variable should be used to overide + assert ( + context.render("db_2.second_schema.model_one").sql() + == 'SELECT \'gateway_2\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' + ) + + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 4 + context.apply(plan) + + # Validate the tables that source from the first tables are correct as well with evaluate + assert ( + context.evaluate( + "first_schema.model_two", start=now(), end=now(), execution_time=now() + ).to_string() + == " item_id global_one\n0 gateway_1 88" + ) + assert ( + context.evaluate( + "db_2.second_schema.model_two", start=now(), end=now(), execution_time=now() + ).to_string() + == " item_id global_one\n0 gateway_2 88" + ) + + assert sorted(set(snapshot.name for snapshot in plan.directly_modified)) == [ + '"db_1"."first_schema"."model_one"', + '"db_1"."first_schema"."model_two"', + '"db_2"."second_schema"."model_one"', + '"db_2"."second_schema"."model_two"', + ] + + model = context.get_model("db_1.first_schema.model_one") + + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder().build() + context.apply(plan) + + state_environments = context.state_reader.get_environments() + state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) + + assert state_environments[0].gateway_managed + assert len(state_snapshots) == len(state_environments[0].snapshots) + assert [snapshot.name for snapshot in plan.directly_modified] == [ + '"db_1"."first_schema"."model_one"' + ] + assert [x.name for x in list(plan.indirectly_modified.values())[0]] == [ + '"db_1"."first_schema"."model_two"' + ] + + assert len(plan.missing_intervals) == 1 + assert ( + context.evaluate( + "db_1.first_schema.model_one", start=now(), end=now(), execution_time=now() + ).to_string() + == " item_id global_one macro_one extra\n0 gateway_1 88 1 c" + ) + + # Create dev environment with changed models + model = context.get_model("db_2.second_schema.model_one") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + model = context.get_model("first_schema.model_two") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder("dev").build() + context.apply(plan) + + dev_environment = context.state_sync.get_environment("dev") + assert dev_environment is not None + + metadata_engine_1 = DuckDBMetadata.from_context(context) + start_schemas_1 = set(metadata_engine_1.schemas) + assert sorted(start_schemas_1) == sorted( + {"first_schema__dev", "sqlmesh", "first_schema", "sqlmesh__first_schema"} + ) + + metadata_engine_2 = DuckDBMetadata(context._get_engine_adapter("second")) + start_schemas_2 = set(metadata_engine_2.schemas) + assert sorted(start_schemas_2) == sorted( + {"sqlmesh__second_schema", "second_schema", "second_schema__dev"} + ) + + # Invalidate dev environment + context.invalidate_environment("dev") + invalidate_environment = context.state_sync.get_environment("dev") + assert invalidate_environment is not None + assert invalidate_environment.expiration_ts < dev_environment.expiration_ts # type: ignore + assert sorted(start_schemas_1) == sorted(set(metadata_engine_1.schemas)) + assert sorted(start_schemas_2) == sorted(set(metadata_engine_2.schemas)) + + # Run janitor + context._run_janitor() + assert context.state_sync.get_environment("dev") is None + removed_schemas = start_schemas_1 - set(metadata_engine_1.schemas) + assert removed_schemas == {"first_schema__dev"} + removed_schemas = start_schemas_2 - set(metadata_engine_2.schemas) + assert removed_schemas == {"second_schema__dev"} + prod_environment = context.state_sync.get_environment("prod") + + # Remove the second gateway's second model and apply plan + second_model = path / "models/second_schema/model_two.sql" + os.remove(second_model) + assert not second_model.exists() + context = Context(paths=paths, config=config) + plan = context.plan_builder().build() + context.apply(plan) + prod_environment = context.state_sync.get_environment("prod") + assert len(prod_environment.snapshots_) == 3 + + # Changing the flag should show a diff + context.config.gateway_managed_virtual_layer = False + plan = context.plan_builder().build() + assert not plan.requires_backfill + assert ( + plan.context_diff.previous_gateway_managed_virtual_layer + != plan.context_diff.gateway_managed_virtual_layer + ) + assert plan.context_diff.has_changes + + # This should error since the default_gateway won't have access to create the view on a non-shared catalog + with pytest.raises(NodeExecutionFailedError, match=r"Execution failed for node SnapshotId*"): + context.apply(plan) + + +def test_multi_dbt(mocker): + context = Context(paths=["examples/multi_dbt/bronze", "examples/multi_dbt/silver"]) + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 4 + context.apply(plan) + validate_apply_basics(context, c.PROD, plan.snapshots.values()) + + environment_statements = context.state_sync.get_environment_statements(c.PROD) + assert len(environment_statements) == 2 + bronze_statements = environment_statements[0] + assert bronze_statements.before_all == [ + "JINJA_STATEMENT_BEGIN;\nCREATE TABLE IF NOT EXISTS analytic_stats (physical_table VARCHAR, evaluation_time VARCHAR);\nJINJA_END;" + ] + assert not bronze_statements.after_all + silver_statements = environment_statements[1] + assert not silver_statements.before_all + assert silver_statements.after_all == [ + "JINJA_STATEMENT_BEGIN;\n{{ store_schemas(schemas) }}\nJINJA_END;" + ] + assert "store_schemas" in silver_statements.jinja_macros.root_macros + analytics_table = context.fetchdf("select * from analytic_stats;") + assert sorted(analytics_table.columns) == sorted(["physical_table", "evaluation_time"]) + schema_table = context.fetchdf("select * from schema_table;") + assert sorted(schema_table.all_schemas[0]) == sorted(["bronze", "silver"]) + + +def test_multi_hybrid(mocker): + context = Context( + paths=["examples/multi_hybrid/dbt_repo", "examples/multi_hybrid/sqlmesh_repo"] + ) + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + + assert len(plan.new_snapshots) == 5 + assert context.dag.roots == {'"memory"."dbt_repo"."e"'} + assert context.dag.graph['"memory"."dbt_repo"."c"'] == {'"memory"."sqlmesh_repo"."b"'} + assert context.dag.graph['"memory"."sqlmesh_repo"."b"'] == {'"memory"."sqlmesh_repo"."a"'} + assert context.dag.graph['"memory"."sqlmesh_repo"."a"'] == {'"memory"."dbt_repo"."e"'} + assert context.dag.downstream('"memory"."dbt_repo"."e"') == [ + '"memory"."sqlmesh_repo"."a"', + '"memory"."sqlmesh_repo"."b"', + '"memory"."dbt_repo"."c"', + '"memory"."dbt_repo"."d"', + ] + + sqlmesh_model_a = context.get_model("sqlmesh_repo.a") + dbt_model_c = context.get_model("dbt_repo.c") + assert sqlmesh_model_a.project == "sqlmesh_repo" + + sqlmesh_rendered = ( + 'SELECT "e"."col_a" AS "col_a", "e"."col_b" AS "col_b" FROM "memory"."dbt_repo"."e" AS "e"' + ) + dbt_rendered = 'SELECT DISTINCT ROUND(CAST(("b"."col_a" / NULLIF(100, 0)) AS DECIMAL(16, 2)), 2) AS "rounded_col_a" FROM "memory"."sqlmesh_repo"."b" AS "b"' + assert sqlmesh_model_a.render_query().sql() == sqlmesh_rendered + assert dbt_model_c.render_query().sql() == dbt_rendered + + context.apply(plan) + validate_apply_basics(context, c.PROD, plan.snapshots.values()) + + +def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path): + paths = copy_to_temp_path("examples/multi") + repo_1_path = paths[0] / "repo_1" + repo_2_path = paths[0] / "repo_2" + + # Add an extra gateway to repo_2's config + repo_2_config_path = repo_2_path / "config.yaml" + config_content = repo_2_config_path.read_text() + + modified_config = config_content.replace( + "default_gateway: local", + dedent(""" + extra: + connection: + type: duckdb + database: extra.duckdb + + default_gateway: local + """), + ) + + repo_2_config_path.write_text(modified_config) + + # Create context with both repos but using the repo_1 path first + context = Context( + paths=(repo_1_path, repo_2_path), + gateway="memory", + ) + + # Verify all gateways from both repos are present + gathered_gateways = context.engine_adapters.keys() + expected_gateways = {"local", "memory", "extra"} + assert gathered_gateways == expected_gateways diff --git a/tests/core/integration/test_plan_options.py b/tests/core/integration/test_plan_options.py new file mode 100644 index 0000000000..52cd215cc5 --- /dev/null +++ b/tests/core/integration/test_plan_options.py @@ -0,0 +1,478 @@ +from __future__ import annotations + +import typing as t +import pytest +from sqlmesh.core.console import ( + set_console, + get_console, + TerminalConsole, +) +import time_machine + +from sqlmesh.core import dialect as d +from sqlmesh.core.console import get_console +from sqlmesh.core.model import ( + SqlModel, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import to_datetime, to_timestamp +from sqlmesh.utils.errors import ( + NoChangesPlanError, +) +from tests.core.integration.utils import ( + add_projection_to_model, +) + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_empty_backfill(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + plan = context.plan_builder("prod", skip_tests=True, empty_backfill=True).build() + assert plan.missing_intervals + assert plan.empty_backfill + assert not plan.requires_backfill + + context.apply(plan) + + for model in context.models.values(): + if model.is_seed or model.kind.is_symbolic: + continue + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM {model.name}")[0] + assert row_num == 0 + + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.requires_backfill + assert not plan.has_changes + assert not plan.missing_intervals + + snapshots = plan.snapshots + for snapshot in snapshots.values(): + if not snapshot.intervals: + continue + assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_empty_backfill_new_model(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + new_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind FULL, + cron '0 8 * * *', + start '2023-01-01', + ); + + SELECT 1 AS one; + """ + ) + ) + new_model_name = context.upsert_model(new_model).fqn + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + plan = context.plan_builder("dev", skip_tests=True, empty_backfill=True).build() + assert plan.end == to_datetime("2023-01-09") + assert plan.missing_intervals + assert plan.empty_backfill + assert not plan.requires_backfill + + context.apply(plan) + + for model in context.models.values(): + if model.is_seed or model.kind.is_symbolic: + continue + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.new_model")[ + 0 + ] + assert row_num == 0 + + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.requires_backfill + assert not plan.missing_intervals + + snapshots = plan.snapshots + for snapshot in snapshots.values(): + if not snapshot.intervals: + continue + elif snapshot.name == new_model_name: + assert snapshot.intervals[-1][1] == to_timestamp("2023-01-09") + else: + assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_explain(init_and_plan_context: t.Callable): + old_console = get_console() + set_console(TerminalConsole()) + + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") + waiter_revenue_by_day_model = add_projection_to_model( + t.cast(SqlModel, waiter_revenue_by_day_model) + ) + context.upsert_model(waiter_revenue_by_day_model) + + waiter_revenue_by_day_snapshot = context.get_snapshot(waiter_revenue_by_day_model.name) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters") + + common_kwargs = dict(skip_tests=True, no_prompts=True, explain=True) + + # For now just making sure the plan doesn't error + context.plan("dev", **common_kwargs) + context.plan("dev", **common_kwargs, skip_backfill=True) + context.plan("dev", **common_kwargs, empty_backfill=True) + context.plan("dev", **common_kwargs, forward_only=True, enable_preview=True) + context.plan("prod", **common_kwargs) + context.plan("prod", **common_kwargs, forward_only=True) + context.plan("prod", **common_kwargs, restate_models=[waiter_revenue_by_day_model.name]) + + set_console(old_console) + + # Make sure that the now changes were actually applied + for target_env in ("dev", "prod"): + plan = context.plan_builder(target_env, skip_tests=True).build() + assert plan.has_changes + assert plan.missing_intervals + assert plan.directly_modified == {waiter_revenue_by_day_snapshot.snapshot_id} + assert len(plan.new_snapshots) == 2 + assert {s.snapshot_id for s in plan.new_snapshots} == { + waiter_revenue_by_day_snapshot.snapshot_id, + top_waiters_snapshot.snapshot_id, + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_ignore_cron( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + expressions = d.parse( + f""" + MODEL ( + name memory.sushi.test_allow_partials, + kind INCREMENTAL_UNMANAGED, + allow_partials true, + start '2023-01-01', + ); + + SELECT @end_ts AS end_ts + """ + ) + model = load_sql_based_model(expressions) + + context.upsert_model(model) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + assert ( + context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ + 0 + ] + == "2023-01-07 23:59:59.999999" + ) + + plan_no_ignore_cron = context.plan_builder( + "prod", run=True, ignore_cron=False, skip_tests=True + ).build() + assert not plan_no_ignore_cron.missing_intervals + + plan = context.plan_builder("prod", run=True, ignore_cron=True, skip_tests=True).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot(model, raise_if_missing=True).snapshot_id, + intervals=[ + (to_timestamp("2023-01-08"), to_timestamp("2023-01-08 15:00:00")), + ], + ) + ] + context.apply(plan) + + assert ( + context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ + 0 + ] + == "2023-01-08 14:59:59.999999" + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_with_run( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + plan = context.plan(run=True) + assert plan.has_changes + assert plan.missing_intervals + + context.apply(plan) + + snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) + assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { + '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), + '"memory"."sushi"."items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."latest_order"': to_timestamp("2023-01-09"), + '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), + '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-09"), + '"memory"."sushi"."marketing"': to_timestamp("2023-01-09"), + '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-09"), + '"memory"."raw"."demographics"': to_timestamp("2023-01-09"), + "assert_item_price_above_zero": to_timestamp("2023-01-09"), + '"memory"."sushi"."active_customers"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customers"': to_timestamp("2023-01-09"), + '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-09"), + '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-09"), + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_select_models(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Modify 2 models. + model = context.get_model("sushi.waiter_revenue_by_day") + kwargs = { + **model.dict(), + # Make a breaking change. + "query": model.query.order_by("waiter_id"), # type: ignore + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + model = context.get_model("sushi.customer_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + expected_intervals = [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ] + + waiter_revenue_by_day_snapshot_id = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id + + # Select one of the modified models. + plan_builder = context.plan_builder( + "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True + ) + snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] + plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) + plan = plan_builder.build() + + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert len(dev_df) == 7 + + # Make sure that we only create a view for the selected model. + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert len(schema_objects) == 1 + assert schema_objects[0].name == "waiter_revenue_by_day" + + # Validate the other modified model. + assert not context.get_snapshot("sushi.customer_revenue_by_day").change_category + assert not context.get_snapshot("sushi.customer_revenue_by_day").version + + # Validate the downstream model. + assert not context.engine_adapter.table_exists( + context.get_snapshot("sushi.top_waiters").table_name() + ) + assert not context.engine_adapter.table_exists( + context.get_snapshot("sushi.top_waiters").table_name(False) + ) + + # Make sure that tables are created when deploying to prod. + plan = context.plan("prod", skip_tests=True) + context.apply(plan) + assert context.engine_adapter.table_exists( + context.get_snapshot("sushi.top_waiters").table_name() + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_select_models_for_backfill(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + expected_intervals = [ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ] + + plan = context.plan_builder( + "dev", backfill_models=["+*waiter_revenue_by_day"], skip_tests=True + ).build() + + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.items", raise_if_missing=True).snapshot_id, + intervals=expected_intervals, + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.order_items", raise_if_missing=True + ).snapshot_id, + intervals=expected_intervals, + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.orders", raise_if_missing=True).snapshot_id, + intervals=expected_intervals, + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert len(dev_df) == 1 + + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == { + "items", + "order_items", + "orders", + "waiter_revenue_by_day", + } + + assert not context.engine_adapter.table_exists( + context.get_snapshot("sushi.customer_revenue_by_day").table_name() + ) + + # Make sure that tables are created when deploying to prod. + plan = context.plan("prod") + context.apply(plan) + assert context.engine_adapter.table_exists( + context.get_snapshot("sushi.customer_revenue_by_day").table_name() + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_select_unchanged_model_for_backfill(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Modify 2 models. + model = context.get_model("sushi.waiter_revenue_by_day") + kwargs = { + **model.dict(), + # Make a breaking change. + "query": d.parse_one( + f"{model.query.sql(dialect='duckdb')} ORDER BY waiter_id", dialect="duckdb" + ), + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + model = context.get_model("sushi.customer_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + expected_intervals = [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ] + + waiter_revenue_by_day_snapshot_id = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id + + # Select one of the modified models. + plan_builder = context.plan_builder( + "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True + ) + snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] + plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) + plan = plan_builder.build() + + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + # Make sure that we only create a view for the selected model. + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == {"waiter_revenue_by_day"} + + # Now select a model downstream from the previously modified one in order to backfill it. + plan = context.plan_builder("dev", select_models=["*top_waiters"], skip_tests=True).build() + + assert not plan.has_changes + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.top_waiters", raise_if_missing=True + ).snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + # Make sure that a view has been created for the downstream selected model. + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == {"waiter_revenue_by_day", "top_waiters"} + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_create_environment_no_changes_with_selector(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with pytest.raises(NoChangesPlanError): + context.plan_builder("dev").build() + + plan = context.plan_builder("dev", select_models=["*top_waiters"]).build() + assert not plan.missing_intervals + context.apply(plan) + + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == {"top_waiters"} diff --git a/tests/core/integration/test_restatement.py b/tests/core/integration/test_restatement.py new file mode 100644 index 0000000000..a00d8d7ab5 --- /dev/null +++ b/tests/core/integration/test_restatement.py @@ -0,0 +1,1882 @@ +from __future__ import annotations + +import typing as t +import pandas as pd # noqa: TID253 +import pytest +from pathlib import Path +from sqlmesh.core.console import ( + MarkdownConsole, + set_console, + get_console, + CaptureTerminalConsole, +) +import time_machine +from sqlglot import exp +import re +from concurrent.futures import ThreadPoolExecutor, TimeoutError +import time +import queue + +from sqlmesh.core import constants as c +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.core.model import ( + IncrementalByTimeRangeKind, + IncrementalUnmanagedKind, + SqlModel, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.core.snapshot import ( + Snapshot, + SnapshotId, +) +from sqlmesh.utils.date import to_timestamp +from sqlmesh.utils.errors import ( + ConflictingPlanError, +) +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_plan_ignores_changes(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + restated_snapshot = context.get_snapshot("sushi.top_waiters") + + # Simulate a change. + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + plan = context.plan_builder(restate_models=["sushi.top_waiters"]).build() + assert plan.snapshots != context.snapshots + + assert not plan.directly_modified + assert not plan.has_changes + assert not plan.new_snapshots + assert plan.requires_backfill + assert plan.restatements == { + restated_snapshot.snapshot_id: (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) + } + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=restated_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ) + ] + + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_plan_across_environments_snapshot_with_shared_version( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + # Change kind to incremental unmanaged + model = context.get_model("sushi.waiter_revenue_by_day") + previous_kind = model.kind.copy(update={"forward_only": True}) + assert isinstance(previous_kind, IncrementalByTimeRangeKind) + + model = model.copy( + update={ + "kind": IncrementalUnmanagedKind(), + "physical_version": "pinned_version_12345", + "partitioned_by_": [exp.column("event_date")], + } + ) + context.upsert_model(model) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Make some change and deploy it to both dev and prod environments + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + context.plan("dev_a", auto_apply=True, no_prompts=True) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Change the kind back to incremental by time range and deploy to prod + model = model.copy(update={"kind": previous_kind}) + context.upsert_model(model) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Restate the model and verify that the interval hasn't been expanded because of the old snapshot + # with the same version + context.plan( + restate_models=["sushi.waiter_revenue_by_day"], + start="2023-01-06", + end="2023-01-08", + auto_apply=True, + no_prompts=True, + ) + + assert ( + context.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL AND event_date < '2023-01-06'" + )["cnt"][0] + == 0 + ) + plan = context.plan_builder("prod").build() + assert not plan.missing_intervals + + +def test_restatement_plan_hourly_with_downstream_daily_restates_correct_intervals(tmp_path: Path): + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind FULL, + cron '@daily' + ); + + select account_id, ts from test.a; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply + ctx.plan(auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} wasnt cleared" + + # Put some data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 01:30:00", + "2024-01-01 23:30:00", + "2024-01-02 03:30:00", + "2024-01-03 12:30:00", + ], + } + ) + engine_adapter.replace_query( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # Restate A across a day boundary with the expectation that two day intervals in B are affected + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 02:00:00", + end="2024-01-02 04:00:00", + auto_apply=True, + no_prompts=True, + ) + + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", # present already + # "2024-01-01 02:30:00", #removed in last restatement + "2024-01-01 23:30:00", # added in last restatement + "2024-01-02 03:30:00", # added in last restatement + ], f"Table {tbl} wasnt cleared" + + +def test_restatement_plan_respects_disable_restatements(tmp_path: Path): + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01', + cron '@daily' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts", + disable_restatement true, + ), + start '2024-01-01', + cron '@daily' + ); + + select account_id, ts from test.a; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply + ctx.plan(auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + def get_snapshot_intervals(snapshot_id): + return list(ctx.state_sync.get_snapshots([snapshot_id]).values())[0].intervals + + # verify initial state + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A and expect b to be ignored + starting_b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01", + end="2024-01-02", + auto_apply=True, + no_prompts=True, + ) + + # verify A was changed and not b + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + assert _dates_in_table("test.b") == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # Verify B intervals were not touched + b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) + assert starting_b_intervals == b_intervals + + +def test_restatement_plan_clears_correct_intervals_across_environments(tmp_path: Path): + model1 = """ + MODEL ( + name test.incremental_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "date" + ), + start '2024-01-01', + cron '@daily' + ); + + select account_id, date from test.external_table; + """ + + model2 = """ + MODEL ( + name test.downstream_of_incremental, + kind FULL + ); + + select account_id, date from test.incremental_model; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "model1.sql", "w") as f: + f.write(model1) + + with open(models_dir / "model2.sql", "w") as f: + f.write(model2) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004, 1005], + "name": ["foo", "bar", "baz", "bing", "bong"], + "date": ["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05"], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "name": exp.DataType.build("varchar"), + "date": exp.DataType.build("date"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # first, create the prod models + ctx.plan(auto_apply=True, no_prompts=True) + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) + assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (5,) + assert not engine_adapter.table_exists("test__dev.incremental_model") + + # then, make a dev version + model1 = """ + MODEL ( + name test.incremental_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "date" + ), + start '2024-01-01', + cron '@daily' + ); + + select 1 as account_id, date from test.external_table; + """ + with open(models_dir / "model1.sql", "w") as f: + f.write(model1) + ctx.load() + + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + assert engine_adapter.table_exists("test__dev.incremental_model") + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (5,) + + # drop some source data so when we restate the interval it essentially clears it which is easy to verify + engine_adapter.execute("delete from test.external_table where date = '2024-01-01'") + assert engine_adapter.fetchone("select count(*) from test.external_table") == (4,) + + # now, restate intervals in dev and verify prod is NOT affected + ctx.plan( + environment="dev", + start="2024-01-01", + end="2024-01-02", + restate_models=["test.incremental_model"], + auto_apply=True, + no_prompts=True, + ) + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-01'" + ) == (1,) + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-01'" + ) == (0,) + + # prod still should not be affected by a run because the restatement only happened in dev + ctx.run() + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-01'" + ) == (1,) + + # drop another interval from the source data + engine_adapter.execute("delete from test.external_table where date = '2024-01-02'") + + # now, restate intervals in prod and verify that dev IS affected + ctx.plan( + start="2024-01-01", + end="2024-01-03", + restate_models=["test.incremental_model"], + auto_apply=True, + no_prompts=True, + ) + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (3,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-01'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-02'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-03'" + ) == (1,) + + # dev not affected yet until `sqlmesh run` is run + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-01'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-02'" + ) == (1,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-03'" + ) == (1,) + + # the restatement plan for prod should have cleared dev intervals too, which means this `sqlmesh run` re-runs 2024-01-01 and 2024-01-02 + ctx.run(environment="dev") + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (3,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-01'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-02'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-03'" + ) == (1,) + + # the downstream full model should always reflect whatever the incremental model is showing + assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (3,) + assert engine_adapter.fetchone("select count(*) from test__dev.downstream_of_incremental") == ( + 3, + ) + + +def test_prod_restatement_plan_clears_correct_intervals_in_derived_dev_tables(tmp_path: Path): + """ + Scenario: + I have models A[hourly] <- B[daily] <- C in prod + I create dev and add 2 new models D and E so that my dev DAG looks like A <- B <- C <- D[daily] <- E + I prod, I restate *one hour* of A + Outcome: + D and E should be restated in dev despite not being a part of prod + since B and D are daily, the whole day should be restated even though only 1hr of the upstream model was restated + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + def _derived_full_model_def(name: str, upstream: str) -> str: + return f""" + MODEL ( + name test.{name}, + kind FULL + ); + + select account_id, ts from test.{upstream}; + """ + + def _derived_incremental_model_def(name: str, upstream: str) -> str: + return f""" + MODEL ( + name test.{name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.{upstream} where ts between @start_ts and @end_ts; + """ + + model_b = _derived_incremental_model_def("b", upstream="a") + model_c = _derived_full_model_def("c", upstream="b") + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a, "b.sql": model_b, "c.sql": model_c}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A, B, C in prod + ctx.plan(auto_apply=True, no_prompts=True) + + # add D[daily], E in dev + model_d = _derived_incremental_model_def("d", upstream="c") + model_e = _derived_full_model_def("e", upstream="d") + + for path, defn in { + "d.sql": model_d, + "e.sql": model_e, + }.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test.b", "test.c", "test__dev.d", "test__dev.e"]: + assert engine_adapter.table_exists(tbl) + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + for tbl in ["test.d", "test.e"]: + assert not engine_adapter.table_exists(tbl) + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + for tbl in ["test.a", "test.b", "test.c"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} wasnt cleared" + + # dev shouldnt have been affected yet + for tbl in ["test__dev.d", "test__dev.e"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} was prematurely cleared" + + # run dev to trigger the processing of the prod restatement + ctx.run(environment="dev") + + # data should now be cleared from dev + # note that D is a daily model, so clearing an hour interval from A should have triggered the full day in D + for tbl in ["test__dev.d", "test__dev.e"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} wasnt cleared" + + +def test_prod_restatement_plan_clears_unaligned_intervals_in_derived_dev_tables(tmp_path: Path): + """ + Scenario: + I have a model A[hourly] in prod + I create dev and add a model B[daily] + I prod, I restate *one hour* of A + + Outcome: + The whole day for B should be restated. The restatement plan for prod has no hints about B's cadence because + B only exists in dev and there are no other downstream models in prod that would cause the restatement intervals + to be widened. + + Therefore, this test checks that SQLMesh does the right thing when an interval is partially cleared + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.a where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[hourly] in prod + ctx.plan(auto_apply=True, no_prompts=True) + + # add B[daily] in dev + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # dev shouldnt have been affected yet + assert _dates_in_table("test__dev.b") == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # mess with A independently of SQLMesh to prove a whole day gets restated for B instead of just 1hr + snapshot_table_name = ctx.table_name("test.a", "dev") + engine_adapter.execute( + f"delete from {snapshot_table_name} where cast(ts as date) == '2024-01-01'" + ) + engine_adapter.execute( + f"insert into {snapshot_table_name} (account_id, ts) values (1007, '2024-01-02 01:30:00')" + ) + + assert _dates_in_table("test.a") == ["2024-01-02 00:30:00", "2024-01-02 01:30:00"] + + # run dev to trigger the processing of the prod restatement + ctx.run(environment="dev") + + # B should now have no data for 2024-01-01 + # To prove a single day was restated vs the whole model, it also shouldnt have the '2024-01-02 01:30:00' record + assert _dates_in_table("test__dev.b") == ["2024-01-02 00:30:00"] + + +def test_prod_restatement_plan_causes_dev_intervals_to_be_processed_in_next_dev_plan( + tmp_path: Path, +): + """ + Scenario: + I have a model A[hourly] in prod + I create dev and add a model B[daily] + I prod, I restate *one hour* of A + In dev, I run a normal plan instead of a cadence run + + Outcome: + The whole day for B should be restated as part of a normal plan + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.a where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[hourly] in prod + ctx.plan(auto_apply=True, no_prompts=True) + + # add B[daily] in dev + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # dev shouldnt have been affected yet + assert _dates_in_table("test__dev.b") == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # plan dev which should trigger the missing intervals to get repopulated + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + # dev should have the restated data + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + +def test_prod_restatement_plan_causes_dev_intervals_to_be_widened_on_full_restatement_only_model( + tmp_path, +): + """ + Scenario: + I have am INCREMENTAL_BY_TIME_RANGE model A[daily] in prod + I create dev and add a INCREMENTAL_BY_UNIQUE_KEY model B (which supports full restatement only) + I prod, I restate one day of A which should cause intervals in dev to be cleared (but not processed) + In dev, I run a plan + + Outcome: + In the dev plan, the entire model for B should be rebuilt because it does not support partial restatement + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select account_id, ts from test.external_table where ts between @start_ts and @end_ts; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key (account_id, ts) + ), + cron '@daily' + ); + + select account_id, ts from test.a where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-02 01:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[daily] in prod + ctx.plan(auto_apply=True) + + # add B[daily] in dev + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-02 01:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-02 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-02 00:00:00", + end="2024-01-03 00:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + # dev shouldnt have been affected yet + assert _dates_in_table("test__dev.b") == [ + "2024-01-01 00:30:00", + "2024-01-02 01:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + # plan dev which should trigger the missing intervals to get repopulated + ctx.plan(environment="dev", auto_apply=True) + + # dev should have fully refreshed + # this is proven by the fact that INCREMENTAL_BY_UNIQUE_KEY cant propagate deletes, so if the + # model was not fully rebuilt, the deleted record would still be present + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + +def test_prod_restatement_plan_missing_model_in_dev( + tmp_path: Path, +): + """ + Scenario: + I have a model B in prod but only model A in dev + I restate B in prod + + Outcome: + The A model should be ignore and the plan shouldn't fail + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.external_table where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[hourly] in dev + ctx.plan("dev", auto_apply=True, no_prompts=True) + + # add B[daily] in prod and remove A + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + Path(models_dir / "a.sql").unlink() + + # plan + apply dev + ctx.load() + ctx.plan(auto_apply=True, no_prompts=True) + + # restate B in prod + ctx.plan( + restate_models=["test.b"], + start="2024-01-01", + end="2024-01-02", + auto_apply=True, + no_prompts=True, + ) + + +def test_prod_restatement_plan_includes_related_unpromoted_snapshots(tmp_path: Path): + """ + Scenario: + - I have models A <- B in prod + - I have models A <- B <- C in dev + - Both B and C have gone through a few iterations in dev so multiple snapshot versions exist + for them but not all of them are promoted / active + - I restate A in prod + + Outcome: + - Intervals should be cleared for all of the versions of B and C, regardless + of if they are active in any particular environment, in case they ever get made + active + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + (models_dir / "a.sql").write_text(""" + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select 1 as a, now() as ts; + """) + + (models_dir / "b.sql").write_text(""" + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select a, ts from test.a + """) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01")) + ctx = Context(paths=[tmp_path], config=config) + + def _all_snapshots() -> t.Dict[SnapshotId, Snapshot]: + all_snapshot_ids = [ + SnapshotId(name=name, identifier=identifier) + for (name, identifier) in ctx.state_sync.state_sync.engine_adapter.fetchall( # type: ignore + "select name, identifier from sqlmesh._snapshots" + ) + ] + return ctx.state_sync.get_snapshots(all_snapshot_ids) + + # plan + apply prod + ctx.plan(environment="prod", auto_apply=True) + assert len(_all_snapshots()) == 2 + + # create dev with new version of B + (models_dir / "b.sql").write_text(""" + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select a, ts, 'b dev 1' as change from test.a + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + assert len(_all_snapshots()) == 3 + + # update B (new version) and create C + (models_dir / "b.sql").write_text(""" + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select a, ts, 'b dev 2' as change from test.a + """) + + (models_dir / "c.sql").write_text(""" + MODEL ( + name test.c, + kind FULL, + cron '@daily' + ); + + select *, 'c initial' as val from test.b + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + assert len(_all_snapshots()) == 5 + + # update C (new version), create D (unrelated) + (models_dir / "c.sql").write_text(""" + MODEL ( + name test.c, + kind FULL, + cron '@daily' + ); + + select *, 'c updated' as val from test.b + """) + + (models_dir / "d.sql").write_text(""" + MODEL ( + name test.d, + cron '@daily' + ); + + select 1 as unrelated + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + all_snapshots_prior_to_restatement = _all_snapshots() + assert len(all_snapshots_prior_to_restatement) == 7 + + def _snapshot_instances(lst: t.Dict[SnapshotId, Snapshot], name_match: str) -> t.List[Snapshot]: + return [s for s_id, s in lst.items() if name_match in s_id.name] + + # verify initial state + + # 1 instance of A (prod) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"a"')) == 1 + + # 3 instances of B (original in prod + 2 updates in dev) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"b"')) == 3 + + # 2 instances of C (initial + update in dev) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"c"')) == 2 + + # 1 instance of D (initial - dev) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"d"')) == 1 + + # restate A in prod + ctx.plan(environment="prod", restate_models=['"memory"."test"."a"'], auto_apply=True) + + all_snapshots_after_restatement = _all_snapshots() + + # All versions of B and C in dev should have had intervals cleared + # D in dev should not be touched and A + B in prod shoud also not be touched + a = _snapshot_instances(all_snapshots_after_restatement, '"a"') + assert len(a) == 1 + + b = _snapshot_instances(all_snapshots_after_restatement, '"b"') + # the 1 B instance in prod should be populated and 2 in dev (1 active) should be cleared + assert len(b) == 3 + assert len([s for s in b if not s.intervals]) == 2 + + c = _snapshot_instances(all_snapshots_after_restatement, '"c"') + # the 2 instances of C in dev (1 active) should be cleared + assert len(c) == 2 + assert len([s for s in c if not s.intervals]) == 2 + + d = _snapshot_instances(all_snapshots_after_restatement, '"d"') + # D should not be touched since it's in no way downstream of A in prod + assert len(d) == 1 + assert d[0].intervals + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dev_restatement_of_prod_model(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) + + restatement_plan = context.plan_builder("dev", restate_models=["*"]).build() + assert set(restatement_plan.restatements) == { + context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, + context.get_snapshot("sushi.top_waiters").snapshot_id, + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_of_full_model_with_start(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + restatement_plan = context.plan( + restate_models=["sushi.customers"], + start="2023-01-07", + auto_apply=True, + no_prompts=True, + ) + + sushi_customer_interval = restatement_plan.restatements[ + context.get_snapshot("sushi.customers").snapshot_id + ] + assert sushi_customer_interval == (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) + waiter_by_day_interval = restatement_plan.restatements[ + context.get_snapshot("sushi.waiter_as_customer_by_day").snapshot_id + ] + assert waiter_by_day_interval == (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_should_not_override_environment_statements(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + context.config.before_all = ["SELECT 'test_before_all';", *context.config.before_all] + context.load() + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + prod_env_statements = context.state_reader.get_environment_statements(c.PROD) + assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" + + context.plan( + restate_models=["sushi.waiter_revenue_by_day"], + start="2023-01-07", + auto_apply=True, + no_prompts=True, + ) + + prod_env_statements = context.state_reader.get_environment_statements(c.PROD) + assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_shouldnt_backfill_beyond_prod_intervals(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.top_waiters") + context.upsert_model(SqlModel.parse_obj({**model.dict(), "cron": "@hourly"})) + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + context.run() + + with time_machine.travel("2023-01-09 02:00:00 UTC"): + # It's time to backfill the waiter_revenue_by_day model but it hasn't run yet + restatement_plan = context.plan( + restate_models=["sushi.waiter_revenue_by_day"], + no_prompts=True, + skip_tests=True, + ) + intervals_by_id = {i.snapshot_id: i for i in restatement_plan.missing_intervals} + # Make sure the intervals don't go beyond the prod intervals + assert intervals_by_id[context.get_snapshot("sushi.top_waiters").snapshot_id].intervals[-1][ + 1 + ] == to_timestamp("2023-01-08 15:00:00 UTC") + assert intervals_by_id[ + context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id + ].intervals[-1][1] == to_timestamp("2023-01-08 00:00:00 UTC") + + +def test_restatement_plan_interval_external_visibility(tmp_path: Path): + """ + Scenario: + - `prod` environment exists, models A <- B + - `dev` environment created, models A <- B(dev) <- C (dev) + - Restatement plan is triggered against `prod` for model A + - During restatement, a new dev environment `dev_2` is created with a new version of B(dev_2) + + Outcome: + - At no point are the prod_intervals considered "missing" from state for A + - The intervals for B(dev) and C(dev) are cleared + - The intervals for B(dev_2) are also cleared even though the environment didnt exist at the time the plan was started, + because they are based on the data from a partially restated version of A + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + lock_file_path = tmp_path / "test.lock" # python model blocks while this file is present + + evaluation_lock_file_path = ( + tmp_path / "evaluation.lock" + ) # python model creates this file if it's in the wait loop and deletes it once done + + # Note: to make execution block so we can test stuff, we use a Python model that blocks until it no longer detects the presence of a file + (models_dir / "model_a.py").write_text(f""" +from sqlmesh.core.model import model +from sqlmesh.core.macros import MacroEvaluator + +@model( + "test.model_a", + is_sql=True, + kind="FULL" +) +def entrypoint(evaluator: MacroEvaluator) -> str: + from pathlib import Path + import time + + if evaluator.runtime_stage == 'evaluating': + while True: + if Path("{str(lock_file_path)}").exists(): + Path("{str(evaluation_lock_file_path)}").touch() + print("lock exists; sleeping") + time.sleep(2) + else: + Path("{str(evaluation_lock_file_path)}").unlink(missing_ok=True) + break + + return "select 'model_a' as m" +""") + + (models_dir / "model_b.sql").write_text(""" + MODEL ( + name test.model_b, + kind FULL + ); + + select a.m as m, 'model_b' as mb from test.model_a as a + """) + + config = Config( + gateways={ + "": GatewayConfig( + connection=DuckDBConnectionConfig(database=str(tmp_path / "db.db")), + state_connection=DuckDBConnectionConfig(database=str(tmp_path / "state.db")), + ) + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), + ) + ctx = Context(paths=[tmp_path], config=config) + + ctx.plan(environment="prod", auto_apply=True) + + assert len(ctx.snapshots) == 2 + assert all(s.intervals for s in ctx.snapshots.values()) + + prod_model_a_snapshot_id = ctx.snapshots['"db"."test"."model_a"'].snapshot_id + prod_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id + + # dev models + # new version of B + (models_dir / "model_b.sql").write_text(""" + MODEL ( + name test.model_b, + kind FULL + ); + + select a.m as m, 'model_b' as mb, 'dev' as dev_version from test.model_a as a + """) + + # add C + (models_dir / "model_c.sql").write_text(""" + MODEL ( + name test.model_c, + kind FULL + ); + + select b.*, 'model_c' as mc from test.model_b as b + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + + dev_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id + dev_model_c_snapshot_id = ctx.snapshots['"db"."test"."model_c"'].snapshot_id + + assert dev_model_b_snapshot_id != prod_model_b_snapshot_id + + # now, we restate A in prod but touch the lockfile so it hangs during evaluation + # we also have to do it in its own thread due to the hang + lock_file_path.touch() + + def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue): + q.put("thread_started") + + # give this thread its own Context object to prevent segfaulting the Python interpreter + restatement_ctx = Context(paths=[tmp_path], config=config) + + # dev2 not present before the restatement plan starts + assert restatement_ctx.state_sync.get_environment("dev2") is None + + q.put("plan_started") + plan = restatement_ctx.plan( + environment="prod", restate_models=['"db"."test"."model_a"'], auto_apply=True + ) + q.put("plan_completed") + + # dev2 was created during the restatement plan + assert restatement_ctx.state_sync.get_environment("dev2") is not None + + return plan + + executor = ThreadPoolExecutor() + q: queue.Queue = queue.Queue() + restatement_plan_future = executor.submit(_run_restatement_plan, tmp_path, config, q) + assert q.get() == "thread_started" + + try: + if e := restatement_plan_future.exception(timeout=1): + # abort early if the plan thread threw an exception + raise e + except TimeoutError: + # that's ok, we dont actually expect the plan to have finished in 1 second + pass + + # while that restatement is running, we can simulate another process and check that it sees no empty intervals + assert q.get() == "plan_started" + + # dont check for potentially missing intervals until the plan is in the evaluation loop + attempts = 0 + while not evaluation_lock_file_path.exists(): + time.sleep(2) + attempts += 1 + if attempts > 10: + raise ValueError("Gave up waiting for evaluation loop") + + ctx.clear_caches() # get rid of the file cache so that data is re-fetched from state + prod_models_from_state = ctx.state_sync.get_snapshots( + snapshot_ids=[prod_model_a_snapshot_id, prod_model_b_snapshot_id] + ) + + # prod intervals should be present still + assert all(m.intervals for m in prod_models_from_state.values()) + + # so should dev intervals since prod restatement is still running + assert all(m.intervals for m in ctx.snapshots.values()) + + # now, lets create a new dev environment "dev2", while the prod restatement plan is still running, + # that changes model_b while still being based on the original version of model_a + (models_dir / "model_b.sql").write_text(""" + MODEL ( + name test.model_b, + kind FULL + ); + + select a.m as m, 'model_b' as mb, 'dev2' as dev_version from test.model_a as a + """) + ctx.load() + ctx.plan(environment="dev2", auto_apply=True) + + dev2_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id + assert dev2_model_b_snapshot_id != dev_model_b_snapshot_id + assert dev2_model_b_snapshot_id != prod_model_b_snapshot_id + + # as at this point, everything still has intervals + ctx.clear_caches() + assert all( + s.intervals + for s in ctx.state_sync.get_snapshots( + snapshot_ids=[ + prod_model_a_snapshot_id, + prod_model_b_snapshot_id, + dev_model_b_snapshot_id, + dev_model_c_snapshot_id, + dev2_model_b_snapshot_id, + ] + ).values() + ) + + # now, we finally let that restatement plan complete + # first, verify it's still blocked where it should be + assert not restatement_plan_future.done() + + lock_file_path.unlink() # remove lock file, plan should be able to proceed now + + if e := restatement_plan_future.exception(): # blocks until future complete + raise e + + assert restatement_plan_future.result() + assert q.get() == "plan_completed" + + ctx.clear_caches() + + # check that intervals in prod are present + assert all( + s.intervals + for s in ctx.state_sync.get_snapshots( + snapshot_ids=[ + prod_model_a_snapshot_id, + prod_model_b_snapshot_id, + ] + ).values() + ) + + # check that intervals in dev have been cleared, including the dev2 env that + # was created after the restatement plan started + assert all( + not s.intervals + for s in ctx.state_sync.get_snapshots( + snapshot_ids=[ + dev_model_b_snapshot_id, + dev_model_c_snapshot_id, + dev2_model_b_snapshot_id, + ] + ).values() + ) + + executor.shutdown() + + +def test_restatement_plan_detects_prod_deployment_during_restatement(tmp_path: Path): + """ + Scenario: + - `prod` environment exists, model A + - `dev` environment created, model A(dev) + - Restatement plan is triggered against `prod` for model A + - During restatement, someone else deploys A(dev) to prod, replacing the model that is currently being restated. + + Outcome: + - The deployment plan for dev -> prod should succeed in deploying the new version of A + - The prod restatement plan should fail with a ConflictingPlanError and warn about the model that got updated while undergoing restatement + - The new version of A should have no intervals cleared. The user needs to rerun the restatement if the intervals should still be cleared + """ + orig_console = get_console() + console = CaptureTerminalConsole() + set_console(console) + + models_dir = tmp_path / "models" + models_dir.mkdir() + + lock_file_path = tmp_path / "test.lock" # python model blocks while this file is present + + evaluation_lock_file_path = ( + tmp_path / "evaluation.lock" + ) # python model creates this file if it's in the wait loop and deletes it once done + + # Note: to make execution block so we can test stuff, we use a Python model that blocks until it no longer detects the presence of a file + (models_dir / "model_a.py").write_text(f""" +from sqlmesh.core.model import model +from sqlmesh.core.macros import MacroEvaluator + +@model( + "test.model_a", + is_sql=True, + kind="FULL" +) +def entrypoint(evaluator: MacroEvaluator) -> str: + from pathlib import Path + import time + + if evaluator.runtime_stage == 'evaluating': + while True: + if Path("{str(lock_file_path)}").exists(): + Path("{str(evaluation_lock_file_path)}").touch() + print("lock exists; sleeping") + time.sleep(2) + else: + Path("{str(evaluation_lock_file_path)}").unlink(missing_ok=True) + break + + return "select 'model_a' as m" +""") + + config = Config( + gateways={ + "": GatewayConfig( + connection=DuckDBConnectionConfig(database=str(tmp_path / "db.db")), + state_connection=DuckDBConnectionConfig(database=str(tmp_path / "state.db")), + ) + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), + ) + ctx = Context(paths=[tmp_path], config=config) + + # create prod + ctx.plan(environment="prod", auto_apply=True) + original_prod = ctx.state_sync.get_environment("prod") + assert original_prod + + # update model_a for dev + (models_dir / "model_a.py").unlink() + (models_dir / "model_a.sql").write_text(""" + MODEL ( + name test.model_a, + kind FULL + ); + + select 1 as changed + """) + + # create dev + ctx.load() + plan = ctx.plan(environment="dev", auto_apply=True) + assert len(plan.modified_snapshots) == 1 + new_model_a_snapshot_id = list(plan.modified_snapshots)[0] + + # now, trigger a prod restatement plan in a different thread and block it to simulate a long restatement + thread_console = None + + def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue): + nonlocal thread_console + q.put("thread_started") + + # Give this thread its own markdown console to avoid Rich LiveError + thread_console = MarkdownConsole() + set_console(thread_console) + + # give this thread its own Context object to prevent segfaulting the Python interpreter + restatement_ctx = Context(paths=[tmp_path], config=config) + + # ensure dev is present before the restatement plan starts + assert restatement_ctx.state_sync.get_environment("dev") is not None + + q.put("plan_started") + expected_error = None + try: + restatement_ctx.plan( + environment="prod", restate_models=['"db"."test"."model_a"'], auto_apply=True + ) + except ConflictingPlanError as e: + expected_error = e + + q.put("plan_completed") + return expected_error + + executor = ThreadPoolExecutor() + q: queue.Queue = queue.Queue() + lock_file_path.touch() + + restatement_plan_future = executor.submit(_run_restatement_plan, tmp_path, config, q) + restatement_plan_future.add_done_callback(lambda _: executor.shutdown()) + + assert q.get() == "thread_started" + + try: + if e := restatement_plan_future.exception(timeout=1): + # abort early if the plan thread threw an exception + raise e + except TimeoutError: + # that's ok, we dont actually expect the plan to have finished in 1 second + pass + + assert q.get() == "plan_started" + + # ok, now the prod restatement plan is running, let's deploy dev to prod + ctx.plan(environment="prod", auto_apply=True) + + new_prod = ctx.state_sync.get_environment("prod") + assert new_prod + assert new_prod.plan_id != original_prod.plan_id + assert new_prod.previous_plan_id == original_prod.plan_id + + # new prod is deployed but restatement plan is still running + assert not restatement_plan_future.done() + + # allow restatement plan to complete + lock_file_path.unlink() + + plan_error = restatement_plan_future.result() + assert isinstance(plan_error, ConflictingPlanError) + assert "please re-apply your plan" in repr(plan_error).lower() + + output = " ".join(re.split("\\s+", thread_console.captured_output, flags=re.UNICODE)) # type: ignore + assert ( + f"The following models had new versions deployed while data was being restated: └── test.model_a" + in output + ) + + # check that no intervals have been cleared from the model_a currently in prod + model_a = ctx.state_sync.get_snapshots(snapshot_ids=[new_model_a_snapshot_id])[ + new_model_a_snapshot_id + ] + assert isinstance(model_a.node, SqlModel) + assert model_a.node.render_query_or_raise().sql() == 'SELECT 1 AS "changed"' + assert len(model_a.intervals) + + set_console(orig_console) diff --git a/tests/core/integration/test_run.py b/tests/core/integration/test_run.py new file mode 100644 index 0000000000..a3b84b5a9e --- /dev/null +++ b/tests/core/integration/test_run.py @@ -0,0 +1,247 @@ +from __future__ import annotations + +import typing as t +import pytest +import time_machine +from pytest_mock.plugin import MockerFixture + +from sqlmesh.core import constants as c +from sqlmesh.core import dialect as d +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + SqlModel, + PythonModel, + load_sql_based_model, +) +from sqlmesh.utils.date import to_timestamp + +if t.TYPE_CHECKING: + pass + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_run_with_select_models( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + assert context.run(select_models=["*waiter_revenue_by_day"]) + + snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) + # Only waiter_revenue_by_day and its parents should be backfilled up to 2023-01-09. + assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { + '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), + '"memory"."sushi"."items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), + '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), + '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), + "assert_item_price_above_zero": to_timestamp("2023-01-08"), + '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_run_with_select_models_no_auto_upstream( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj({**model.dict(), "audits": []}) + context.upsert_model(model) + + context.plan("prod", no_prompts=True, skip_tests=True, auto_apply=True) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + assert context.run(select_models=["*waiter_revenue_by_day"], no_auto_upstream=True) + + snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) + # Only waiter_revenue_by_day should be backfilled up to 2023-01-09. + assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { + '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."order_items"': to_timestamp("2023-01-08"), + '"memory"."sushi"."orders"': to_timestamp("2023-01-08"), + '"memory"."sushi"."items"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), + '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), + '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), + "assert_item_price_above_zero": to_timestamp("2023-01-08"), + '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_run_respects_excluded_transitive_dependencies(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # Graph: C <- B <- A + # B is a transitive dependency linking A and C + # Note that the alphabetical ordering of the model names is intentional and helps + # surface the problem + expressions_a = d.parse( + f""" + MODEL ( + name memory.sushi.test_model_c, + kind FULL, + allow_partials true, + cron '@hourly', + ); + + SELECT @execution_ts AS execution_ts + """ + ) + model_c = load_sql_based_model(expressions_a) + context.upsert_model(model_c) + + # A VIEW model with no partials allowed and a daily cron instead of hourly. + expressions_b = d.parse( + f""" + MODEL ( + name memory.sushi.test_model_b, + kind VIEW, + allow_partials false, + cron '@daily', + ); + + SELECT * FROM memory.sushi.test_model_c + """ + ) + model_b = load_sql_based_model(expressions_b) + context.upsert_model(model_b) + + expressions_a = d.parse( + f""" + MODEL ( + name memory.sushi.test_model_a, + kind FULL, + allow_partials true, + cron '@hourly', + ); + + SELECT * FROM memory.sushi.test_model_b + """ + ) + model_a = load_sql_based_model(expressions_a) + context.upsert_model(model_a) + + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + assert ( + context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_c")["execution_ts"].iloc[ + 0 + ] + == "2023-01-08 15:00:00" + ) + + with time_machine.travel("2023-01-08 17:00:00 UTC", tick=False): + context.run( + "prod", + select_models=["*test_model_c", "*test_model_a"], + no_auto_upstream=True, + ignore_cron=True, + ) + assert ( + context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_a")[ + "execution_ts" + ].iloc[0] + == "2023-01-08 17:00:00" + ) + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_snapshot_triggers(init_and_plan_context: t.Callable, mocker: MockerFixture): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # auto-restatement triggers + orders = context.get_model("sushi.orders") + orders_kind = { + **orders.kind.dict(), + "auto_restatement_cron": "@hourly", + } + orders_kwargs = { + **orders.dict(), + "kind": orders_kind, + } + context.upsert_model(PythonModel.parse_obj(orders_kwargs)) + + order_items = context.get_model("sushi.order_items") + order_items_kind = { + **order_items.kind.dict(), + "auto_restatement_cron": "@hourly", + } + order_items_kwargs = { + **order_items.dict(), + "kind": order_items_kind, + } + context.upsert_model(PythonModel.parse_obj(order_items_kwargs)) + + waiter_revenue_by_day = context.get_model("sushi.waiter_revenue_by_day") + waiter_revenue_by_day_kind = { + **waiter_revenue_by_day.kind.dict(), + "auto_restatement_cron": "@hourly", + } + waiter_revenue_by_day_kwargs = { + **waiter_revenue_by_day.dict(), + "kind": waiter_revenue_by_day_kind, + } + context.upsert_model(SqlModel.parse_obj(waiter_revenue_by_day_kwargs)) + + context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) + + scheduler = context.scheduler() + + import sqlmesh + + spy = mocker.spy(sqlmesh.core.scheduler.Scheduler, "run_merged_intervals") + + with time_machine.travel("2023-01-09 00:00:01 UTC"): + scheduler.run( + environment=c.PROD, + start="2023-01-01", + auto_restatement_enabled=True, + ) + + assert spy.called + + actual_triggers = spy.call_args.kwargs["auto_restatement_triggers"] + actual_triggers = {k: v for k, v in actual_triggers.items() if v} + assert len(actual_triggers) == 12 + + for id, trigger in actual_triggers.items(): + model_name = id.name.replace('"memory"."sushi".', "").replace('"', "") + auto_restatement_triggers = [ + t.name.replace('"memory"."sushi".', "").replace('"', "") for t in trigger + ] + + if model_name in ("orders", "order_items", "waiter_revenue_by_day"): + assert auto_restatement_triggers == [model_name] + elif model_name in ("customer_revenue_lifetime", "customer_revenue_by_day"): + assert sorted(auto_restatement_triggers) == sorted(["orders", "order_items"]) + elif model_name == "top_waiters": + assert auto_restatement_triggers == ["waiter_revenue_by_day"] + else: + assert auto_restatement_triggers == ["orders"] diff --git a/tests/core/integration/utils.py b/tests/core/integration/utils.py new file mode 100644 index 0000000000..bc731e6cc8 --- /dev/null +++ b/tests/core/integration/utils.py @@ -0,0 +1,350 @@ +from __future__ import annotations + +import typing as t +from sqlmesh.core.model.common import ParsableSql +from sqlglot import exp +from sqlglot.expressions import DataType + +from sqlmesh.core import constants as c +from sqlmesh.core.context import Context +from sqlmesh.core.engine_adapter import EngineAdapter +from sqlmesh.core.environment import EnvironmentNamingInfo +from sqlmesh.core.model import ( + IncrementalByTimeRangeKind, + IncrementalByUniqueKeyKind, + ModelKind, + ModelKindName, + SqlModel, + TimeColumn, +) +from sqlmesh.core.model.kind import model_kind_type_from_name +from sqlmesh.core.plan import Plan, PlanBuilder +from sqlmesh.core.snapshot import ( + DeployabilityIndex, + Snapshot, + SnapshotChangeCategory, + SnapshotId, + SnapshotInfoLike, + SnapshotTableInfo, +) +from sqlmesh.utils.date import TimeLike + + +def select_all(table: str, adapter: EngineAdapter) -> t.Iterable: + return adapter.fetchall(f"select * from {table} order by 1") + + +def snapshots_to_versions(snapshots: t.Iterable[Snapshot]) -> t.Dict[str, str]: + return {snapshot.name: snapshot.version or "" for snapshot in snapshots} + + +def to_snapshot_info(snapshot: SnapshotInfoLike) -> SnapshotTableInfo: + return snapshot.table_info + + +def start(context: Context) -> TimeLike: + env = context.state_sync.get_environment("prod") + assert env + return env.start_at + + +def add_projection_to_model(model: SqlModel, literal: bool = True) -> SqlModel: + one_expr = exp.Literal.number(1).as_("one") if literal else exp.column("one") + kwargs = { + **model.dict(), + "query": model.query.select(one_expr), # type: ignore + } + return SqlModel.parse_obj(kwargs) + + +def plan_choice(plan_builder: PlanBuilder, choice: SnapshotChangeCategory) -> None: + for snapshot in plan_builder.build().snapshots.values(): + if not snapshot.version: + plan_builder.set_choice(snapshot, choice) + + +def apply_to_environment( + context: Context, + environment: str, + choice: t.Optional[SnapshotChangeCategory] = None, + plan_validators: t.Optional[t.Iterable[t.Callable]] = None, + apply_validators: t.Optional[t.Iterable[t.Callable]] = None, + plan_start: t.Optional[TimeLike] = None, + allow_destructive_models: t.Optional[t.List[str]] = None, + enable_preview: bool = False, +): + plan_validators = plan_validators or [] + apply_validators = apply_validators or [] + + plan_builder = context.plan_builder( + environment, + start=plan_start or start(context) if environment != c.PROD else None, + forward_only=choice == SnapshotChangeCategory.FORWARD_ONLY, + include_unmodified=True, + allow_destructive_models=allow_destructive_models if allow_destructive_models else [], + enable_preview=enable_preview, + ) + if environment != c.PROD: + plan_builder.set_start(plan_start or start(context)) + + if choice: + if choice == SnapshotChangeCategory.FORWARD_ONLY: + # FORWARD_ONLY is deprecated, fallback to NON_BREAKING to keep the existing tests + choice = SnapshotChangeCategory.NON_BREAKING + plan_choice(plan_builder, choice) + for validator in plan_validators: + validator(context, plan_builder.build()) + + plan = plan_builder.build() + context.apply(plan) + + validate_apply_basics(context, environment, plan.snapshots.values(), plan.deployability_index) + for validator in apply_validators: + validator(context) + return plan + + +def change_data_type( + context: Context, model_name: str, old_type: DataType.Type, new_type: DataType.Type +) -> None: + model = context.get_model(model_name) + assert model is not None + + if isinstance(model, SqlModel): + query = model.query.copy() + data_types = query.find_all(DataType) + for data_type in data_types: + if data_type.this == old_type: + data_type.set("this", new_type) + context.upsert_model(model_name, query_=ParsableSql(sql=query.sql(dialect=model.dialect))) + elif model.columns_to_types_ is not None: + for k, v in model.columns_to_types_.items(): + if v.this == old_type: + model.columns_to_types_[k] = DataType.build(new_type) + context.upsert_model(model_name, columns=model.columns_to_types_) + + +def validate_snapshots_in_state_sync(snapshots: t.Iterable[Snapshot], context: Context) -> None: + snapshot_infos = map(to_snapshot_info, snapshots) + state_sync_table_infos = map( + to_snapshot_info, context.state_reader.get_snapshots(snapshots).values() + ) + assert set(snapshot_infos) == set(state_sync_table_infos) + + +def validate_state_sync_environment( + snapshots: t.Iterable[Snapshot], env: str, context: Context +) -> None: + environment = context.state_reader.get_environment(env) + assert environment + snapshot_infos = map(to_snapshot_info, snapshots) + environment_table_infos = map(to_snapshot_info, environment.snapshots) + assert set(snapshot_infos) == set(environment_table_infos) + + +def validate_tables( + snapshots: t.Iterable[Snapshot], + context: Context, + deployability_index: t.Optional[DeployabilityIndex] = None, +) -> None: + adapter = context.engine_adapter + deployability_index = deployability_index or DeployabilityIndex.all_deployable() + for snapshot in snapshots: + is_deployable = deployability_index.is_representative(snapshot) + if not snapshot.is_model or snapshot.is_external: + continue + table_should_exist = not snapshot.is_embedded + assert adapter.table_exists(snapshot.table_name(is_deployable)) == table_should_exist + if table_should_exist: + assert select_all(snapshot.table_name(is_deployable), adapter) + + +def validate_environment_views( + snapshots: t.Iterable[Snapshot], + environment: str, + context: Context, + deployability_index: t.Optional[DeployabilityIndex] = None, +) -> None: + adapter = context.engine_adapter + deployability_index = deployability_index or DeployabilityIndex.all_deployable() + for snapshot in snapshots: + is_deployable = deployability_index.is_representative(snapshot) + if not snapshot.is_model or snapshot.is_symbolic: + continue + view_name = snapshot.qualified_view_name.for_environment( + EnvironmentNamingInfo.from_environment_catalog_mapping( + context.config.environment_catalog_mapping, + name=environment, + suffix_target=context.config.environment_suffix_target, + ) + ) + + assert adapter.table_exists(view_name) + assert select_all(snapshot.table_name(is_deployable), adapter) == select_all( + view_name, adapter + ) + + +def validate_apply_basics( + context: Context, + environment: str, + snapshots: t.Iterable[Snapshot], + deployability_index: t.Optional[DeployabilityIndex] = None, +) -> None: + validate_snapshots_in_state_sync(snapshots, context) + validate_state_sync_environment(snapshots, environment, context) + validate_tables(snapshots, context, deployability_index) + validate_environment_views(snapshots, environment, context, deployability_index) + + +def validate_plan_changes( + plan: Plan, + *, + added: t.Optional[t.Iterable[SnapshotId]] = None, + modified: t.Optional[t.Iterable[str]] = None, + removed: t.Optional[t.Iterable[SnapshotId]] = None, +) -> None: + added = added or [] + modified = modified or [] + removed = removed or [] + assert set(added) == plan.context_diff.added + assert set(modified) == set(plan.context_diff.modified_snapshots) + assert set(removed) == set(plan.context_diff.removed_snapshots) + + +def validate_versions_same( + model_names: t.List[str], + versions: t.Dict[str, str], + other_versions: t.Dict[str, str], +) -> None: + for name in model_names: + assert versions[name] == other_versions[name] + + +def validate_versions_different( + model_names: t.List[str], + versions: t.Dict[str, str], + other_versions: t.Dict[str, str], +) -> None: + for name in model_names: + assert versions[name] != other_versions[name] + + +def validate_query_change( + context: Context, + environment: str, + change_category: SnapshotChangeCategory, + logical: bool, +): + versions = snapshots_to_versions(context.snapshots.values()) + + change_data_type( + context, + "sushi.items", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + + directly_modified = ['"memory"."sushi"."items"'] + indirectly_modified = [ + '"memory"."sushi"."order_items"', + '"memory"."sushi"."waiter_revenue_by_day"', + '"memory"."sushi"."customer_revenue_by_day"', + '"memory"."sushi"."customer_revenue_lifetime"', + '"memory"."sushi"."top_waiters"', + "assert_item_price_above_zero", + ] + not_modified = [ + snapshot.name + for snapshot in context.snapshots.values() + if snapshot.name not in directly_modified and snapshot.name not in indirectly_modified + ] + + if change_category == SnapshotChangeCategory.BREAKING and not logical: + models_same = not_modified + models_different = directly_modified + indirectly_modified + elif change_category == SnapshotChangeCategory.FORWARD_ONLY: + models_same = not_modified + directly_modified + indirectly_modified + models_different = [] + else: + models_same = not_modified + indirectly_modified + models_different = directly_modified + + def _validate_plan(context, plan): + validate_plan_changes(plan, modified=directly_modified + indirectly_modified) + assert bool(plan.missing_intervals) != logical + + def _validate_apply(context): + current_versions = snapshots_to_versions(context.snapshots.values()) + validate_versions_same(models_same, versions, current_versions) + validate_versions_different(models_different, versions, current_versions) + + apply_to_environment( + context, + environment, + change_category, + plan_validators=[_validate_plan], + apply_validators=[_validate_apply], + ) + + +def initial_add(context: Context, environment: str): + assert not context.state_reader.get_environment(environment) + + plan = context.plan(environment, start=start(context), create_from="nonexistent_env") + validate_plan_changes(plan, added={x.snapshot_id for x in context.snapshots.values()}) + + context.apply(plan) + validate_apply_basics(context, environment, plan.snapshots.values()) + + +def change_model_kind(context: Context, kind: ModelKindName): + if kind in (ModelKindName.VIEW, ModelKindName.EMBEDDED, ModelKindName.FULL): + context.upsert_model( + "sushi.items", + partitioned_by=[], + ) + context.upsert_model("sushi.items", kind=model_kind_type_from_name(kind)()) # type: ignore + + +def validate_model_kind_change( + kind_name: ModelKindName, + context: Context, + environment: str, + *, + logical: bool, +): + directly_modified = ['"memory"."sushi"."items"'] + indirectly_modified = [ + '"memory"."sushi"."order_items"', + '"memory"."sushi"."waiter_revenue_by_day"', + '"memory"."sushi"."customer_revenue_by_day"', + '"memory"."sushi"."customer_revenue_lifetime"', + '"memory"."sushi"."top_waiters"', + "assert_item_price_above_zero", + ] + if kind_name == ModelKindName.INCREMENTAL_BY_TIME_RANGE: + kind: ModelKind = IncrementalByTimeRangeKind(time_column=TimeColumn(column="event_date")) + elif kind_name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: + kind = IncrementalByUniqueKeyKind(unique_key="id") + else: + kind = model_kind_type_from_name(kind_name)() # type: ignore + + def _validate_plan(context, plan): + validate_plan_changes(plan, modified=directly_modified + indirectly_modified) + assert ( + next( + snapshot + for snapshot in plan.snapshots.values() + if snapshot.name == '"memory"."sushi"."items"' + ).model.kind.name + == kind.name + ) + assert bool(plan.missing_intervals) != logical + + apply_to_environment( + context, + environment, + SnapshotChangeCategory.NON_BREAKING, + plan_validators=[_validate_plan], + ) diff --git a/tests/core/test_dialect.py b/tests/core/test_dialect.py index 52ea673778..02068b1c59 100644 --- a/tests/core/test_dialect.py +++ b/tests/core/test_dialect.py @@ -16,6 +16,8 @@ from sqlmesh.core.model import SqlModel, load_sql_based_model from sqlmesh.core.config.connection import DIALECT_TO_TYPE +pytestmark = pytest.mark.dialect_isolated + def test_format_model_expressions(): x = format_model_expressions( diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py deleted file mode 100644 index bac495a5f1..0000000000 --- a/tests/core/test_integration.py +++ /dev/null @@ -1,10887 +0,0 @@ -from __future__ import annotations - -import typing as t -import json -from collections import Counter -from datetime import timedelta -from unittest import mock -from unittest.mock import patch -import logging -from textwrap import dedent -import os -import numpy as np # noqa: TID253 -import pandas as pd # noqa: TID253 -import pytest -from pytest import MonkeyPatch -from pathlib import Path -from sqlmesh.core.console import ( - MarkdownConsole, - set_console, - get_console, - TerminalConsole, - CaptureTerminalConsole, -) -from sqlmesh.core.config.naming import NameInferenceConfig -from sqlmesh.core.model.common import ParsableSql -from sqlmesh.utils.concurrency import NodeExecutionFailedError -import time_machine -from pytest_mock.plugin import MockerFixture -from sqlglot import exp -from sqlglot.expressions import DataType -import re -from IPython.utils.capture import capture_output -from concurrent.futures import ThreadPoolExecutor, TimeoutError -import time -import queue - -from sqlmesh import CustomMaterialization -from sqlmesh.cli.project_init import init_example_project -from sqlmesh.core import constants as c -from sqlmesh.core import dialect as d -from sqlmesh.core.config import ( - AutoCategorizationMode, - Config, - GatewayConfig, - ModelDefaultsConfig, - DuckDBConnectionConfig, - TableNamingConvention, -) -from sqlmesh.core.config.common import EnvironmentSuffixTarget, VirtualEnvironmentMode -from sqlmesh.core.console import Console, get_console -from sqlmesh.core.context import Context -from sqlmesh.core.config.categorizer import CategorizerConfig -from sqlmesh.core.config.plan import PlanConfig -from sqlmesh.core.engine_adapter import EngineAdapter, DuckDBEngineAdapter -from sqlmesh.core.environment import EnvironmentNamingInfo -from sqlmesh.core.macros import macro -from sqlmesh.core.model import ( - FullKind, - IncrementalByTimeRangeKind, - IncrementalByUniqueKeyKind, - IncrementalUnmanagedKind, - Model, - ModelKind, - ModelKindName, - SqlModel, - PythonModel, - ViewKind, - CustomKind, - TimeColumn, - load_sql_based_model, -) -from sqlmesh.core.model.kind import model_kind_type_from_name -from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals -from sqlmesh.core.snapshot import ( - DeployabilityIndex, - Snapshot, - SnapshotChangeCategory, - SnapshotId, - SnapshotInfoLike, - SnapshotTableInfo, -) -from sqlmesh.utils.date import TimeLike, now, to_date, to_datetime, to_timestamp -from sqlmesh.utils.errors import ( - NoChangesPlanError, - SQLMeshError, - PlanError, - ConfigError, - ConflictingPlanError, -) -from sqlmesh.utils.pydantic import validate_string -from tests.conftest import DuckDBMetadata, SushiDataValidator -from sqlmesh.utils import CorrelationId -from tests.utils.test_helpers import use_terminal_console -from tests.utils.test_filesystem import create_temp_file - -if t.TYPE_CHECKING: - from sqlmesh import QueryOrDF - -pytestmark = pytest.mark.slow - - -@pytest.fixture(autouse=True) -def mock_choices(mocker: MockerFixture): - mocker.patch("sqlmesh.core.console.TerminalConsole._get_snapshot_change_category") - mocker.patch("sqlmesh.core.console.TerminalConsole._prompt_backfill") - - -def plan_choice(plan_builder: PlanBuilder, choice: SnapshotChangeCategory) -> None: - for snapshot in plan_builder.build().snapshots.values(): - if not snapshot.version: - plan_builder.set_choice(snapshot, choice) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@pytest.mark.parametrize( - "context_fixture", - ["sushi_context", "sushi_no_default_catalog"], -) -def test_forward_only_plan_with_effective_date(context_fixture: Context, request): - context = request.getfixturevalue(context_fixture) - model_name = "sushi.waiter_revenue_by_day" - model = context.get_model(model_name) - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model)), start="2023-01-01") - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan_builder = context.plan_builder("dev", skip_tests=True, forward_only=True) - plan = plan_builder.build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only - - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - ] - - plan = plan_builder.set_effective_from("2023-01-05").build() - # Default start should be set to effective_from - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - plan = plan_builder.set_start("2023-01-06").build() - # Start override should take precedence - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - plan = plan_builder.set_effective_from("2023-01-04").build() - # Start should remain unchanged - assert plan.start == "2023-01-06" - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [ - pd.to_datetime("2023-01-06"), - pd.to_datetime("2023-01-07"), - ] - - prod_plan = context.plan_builder(skip_tests=True).build() - # Make sure that the previously set effective_from is respected - assert prod_plan.start == to_timestamp("2023-01-04") - assert prod_plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(prod_plan) - - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert prod_df["event_date"].tolist() == [ - pd.to_datetime(x) for x in ["2023-01-04", "2023-01-05", "2023-01-06", "2023-01-07"] - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_model_regular_plan(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = add_projection_to_model(t.cast(SqlModel, model)) - forward_only_kind = model.kind.copy(update={"forward_only": True}) - model = model.copy(update={"kind": forward_only_kind}) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only - - assert plan.start == to_datetime("2023-01-01") - assert not plan.missing_intervals - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert not dev_df["event_date"].tolist() - - # Run a restatement plan to preview changes - plan_builder = context.plan_builder( - "dev", skip_tests=True, restate_models=[model_name], enable_preview=False - ) - plan_builder.set_start("2023-01-06") - assert plan_builder.build().missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Make sure that changed start is reflected in missing intervals - plan_builder.set_start("2023-01-07") - assert plan_builder.build().missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan_builder.build()) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] - - # Promote changes to prod - prod_plan = context.plan_builder(skip_tests=True).build() - assert not prod_plan.missing_intervals - - context.apply(prod_plan) - - # The change was applied in a forward-only manner so no values in the new column should be populated - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert not prod_df["event_date"].tolist() - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_model_regular_plan_preview_enabled(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = add_projection_to_model(t.cast(SqlModel, model)) - forward_only_kind = model.kind.copy(update={"forward_only": True}) - model = model.copy(update={"kind": forward_only_kind}) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only - - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_model_restate_full_history_in_dev(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - model_name = "memory.sushi.customer_max_revenue" - expressions = d.parse( - f""" - MODEL ( - name {model_name}, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key customer_id, - forward_only true, - ), - ); - - SELECT - customer_id, MAX(revenue) AS max_revenue - FROM memory.sushi.customer_revenue_lifetime - GROUP BY 1; - """ - ) - - model = load_sql_based_model(expressions) - assert model.forward_only - assert model.kind.full_history_restatement_only - context.upsert_model(model) - - context.plan("prod", skip_tests=True, auto_apply=True, enable_preview=False) - - model_kwargs = { - **model.dict(), - # Make a breaking change. - "query": model.query.order_by("customer_id"), # type: ignore - } - context.upsert_model(SqlModel.parse_obj(model_kwargs)) - - # Apply the model change in dev - plan = context.plan_builder( - "dev", - skip_tests=True, - enable_preview=False, - categorizer_config=CategorizerConfig.all_full(), - ).build() - assert not plan.missing_intervals - context.apply(plan) - - snapshot = context.get_snapshot(model, raise_if_missing=True) - snapshot_table_name = snapshot.table_name(False) - - # Manually insert a dummy value to check that the table is recreated during the restatement - context.engine_adapter.insert_append( - snapshot_table_name, - pd.DataFrame({"customer_id": [-1], "max_revenue": [100]}), - ) - df = context.engine_adapter.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" - ) - assert df["cnt"][0] == 1 - - # Apply a restatement plan in dev - plan = context.plan("dev", restate_models=[model.name], auto_apply=True, enable_preview=False) - assert len(plan.missing_intervals) == 1 - - # Check that the dummy value is not present - df = context.engine_adapter.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" - ) - assert df["cnt"][0] == 0 - - # Check that the table is not empty - df = context.engine_adapter.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue" - ) - assert df["cnt"][0] > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_full_history_restatement_model_regular_plan_preview_enabled( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.marketing" # SCD2 model - - model = context.get_model(model_name) - model = add_projection_to_model(t.cast(SqlModel, model)) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - customers_snapshot = context.get_snapshot("sushi.customers", raise_if_missing=True) - active_customers_snapshot = context.get_snapshot( - "sushi.active_customers", raise_if_missing=True - ) - waiter_as_customer_snapshot = context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - - assert len(plan.new_snapshots) == 6 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[customers_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[active_customers_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[waiter_as_customer_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert all(s.is_forward_only for s in plan.new_snapshots) - - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_metadata_changed_regular_plan_preview_enabled(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = model.copy(update={"owner": "new_owner"}) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.METADATA - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.METADATA - ) - assert not plan.missing_intervals - assert not plan.restatements - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_hourly_model_with_lookback_no_backfill_in_dev(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = SqlModel.parse_obj( - { - **model.dict(), - "kind": model.kind.copy(update={"lookback": 1}), - "cron": "@hourly", - "audits": [], - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - top_waiters_model = context.get_model("sushi.top_waiters") - top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) - context.upsert_model(top_waiters_model) - - context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - with time_machine.travel(now() + timedelta(hours=2)): - plan = context.plan_builder("dev", skip_tests=True).build() - # Make sure the waiter_revenue_by_day model is not backfilled. - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC", tick=False) -def test_parent_cron_after_child(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj( - { - **model.dict(), - "cron": "50 23 * * *", - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) - assert waiter_revenue_by_day_snapshot.intervals == [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) - ] - - top_waiters_model = context.get_model("sushi.top_waiters") - top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) - context.upsert_model(top_waiters_model) - - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - with time_machine.travel("2023-01-08 23:55:00 UTC"): # Past parent's cron, but before child's - plan = context.plan_builder("dev", skip_tests=True).build() - # Make sure the waiter_revenue_by_day model is not backfilled. - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -@pytest.mark.parametrize( - "forward_only, expected_intervals", - [ - ( - False, - [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - ], - ), - ( - True, - [ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - ], - ), - ], -) -def test_cron_not_aligned_with_day_boundary( - init_and_plan_context: t.Callable, - forward_only: bool, - expected_intervals: t.List[t.Tuple[int, int]], -): - context, plan = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj( - { - **model.dict(), - "kind": model.kind.copy(update={"forward_only": forward_only}), - "cron": "0 12 * * *", - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) - assert waiter_revenue_by_day_snapshot.intervals == [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) - ] - - model = add_projection_to_model(t.cast(SqlModel, model), literal=True) - context.upsert_model(model) - - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - - with time_machine.travel("2023-01-08 00:10:00 UTC"): # Past model's cron. - plan = context.plan_builder( - "dev", select_models=[model.name], skip_tests=True, enable_preview=True - ).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=expected_intervals, - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_cron_not_aligned_with_day_boundary_new_model(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - existing_model = context.get_model("sushi.waiter_revenue_by_day") - existing_model = SqlModel.parse_obj( - { - **existing_model.dict(), - "kind": existing_model.kind.copy(update={"forward_only": True}), - } - ) - context.upsert_model(existing_model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - # Add a new model and make a change to a forward-only model. - # The cron of the new model is not aligned with the day boundary. - new_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind FULL, - cron '0 8 * * *', - start '2023-01-01', - ); - - SELECT 1 AS one; - """ - ) - ) - context.upsert_model(new_model) - - existing_model = add_projection_to_model(t.cast(SqlModel, existing_model), literal=True) - context.upsert_model(existing_model) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "memory.sushi.new_model", raise_if_missing=True - ).snapshot_id, - intervals=[(to_timestamp("2023-01-06"), to_timestamp("2023-01-07"))], - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_forward_only_preview_child_that_runs_before_parent(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - # This model runs at minute 30 of every hour - upstream_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.upstream_model, - kind FULL, - cron '30 * * * *', - start '2023-01-01', - ); - - SELECT 1 AS a; - """ - ) - ) - context.upsert_model(upstream_model) - - # This model runs at minute 0 of every hour, so it runs before the upstream model - downstream_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.downstream_model, - kind INCREMENTAL_BY_TIME_RANGE( - time_column event_date, - forward_only True, - ), - cron '0 * * * *', - start '2023-01-01', - ); - - SELECT a, '2023-01-06' AS event_date FROM memory.sushi.upstream_model; - """ - ) - ) - context.upsert_model(downstream_model) - - context.plan("prod", skip_tests=True, auto_apply=True) - - with time_machine.travel("2023-01-08 00:05:00 UTC"): - # The downstream model runs but not the upstream model - context.run("prod") - - # Now it's time for the upstream model to run but it hasn't run yet - with time_machine.travel("2023-01-08 00:35:00 UTC"): - # Make a change to the downstream model. - downstream_model = add_projection_to_model(t.cast(SqlModel, downstream_model), literal=True) - context.upsert_model(downstream_model) - - # The plan should only backfill the downstream model despite upstream missing intervals - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot( - downstream_model.name, raise_if_missing=True - ).snapshot_id, - intervals=[ - (to_timestamp("2023-01-07 23:00:00"), to_timestamp("2023-01-08 00:00:00")) - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_forward_only_monthly_model(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj( - { - **model.dict(), - "kind": model.kind.copy(update={"forward_only": True}), - "cron": "0 0 1 * *", - "start": "2022-01-01", - "audits": [], - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) - assert waiter_revenue_by_day_snapshot.intervals == [ - (to_timestamp("2022-01-01"), to_timestamp("2023-01-01")) - ] - - model = add_projection_to_model(t.cast(SqlModel, model), literal=True) - context.upsert_model(model) - - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - - plan = context.plan_builder( - "dev", select_models=[model.name], skip_tests=True, enable_preview=True - ).build() - assert to_timestamp(plan.start) == to_timestamp("2022-12-01") - assert to_timestamp(plan.end) == to_timestamp("2023-01-08") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[(to_timestamp("2022-12-01"), to_timestamp("2023-01-01"))], - ), - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_parent_created_in_dev_child_created_in_prod( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") - waiter_revenue_by_day_model = add_projection_to_model( - t.cast(SqlModel, waiter_revenue_by_day_model) - ) - forward_only_kind = waiter_revenue_by_day_model.kind.copy(update={"forward_only": True}) - waiter_revenue_by_day_model = waiter_revenue_by_day_model.copy( - update={"kind": forward_only_kind} - ) - context.upsert_model(waiter_revenue_by_day_model) - - waiter_revenue_by_day_snapshot = context.get_snapshot( - waiter_revenue_by_day_model, raise_if_missing=True - ) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert all(s.is_forward_only for s in plan.new_snapshots) - assert plan.start == to_datetime("2023-01-01") - assert not plan.missing_intervals - - context.apply(plan) - - # Update the child to refer to a newly added column. - top_waiters_model = context.get_model("sushi.top_waiters") - top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=False) - context.upsert_model(top_waiters_model) - - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 1 - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_view_migration( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.top_waiters") - assert model.kind.is_view - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - - # Apply a forward-only plan - context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True, forward_only=True) - - # Make sure that the new column got reflected in the view schema - df = context.fetchdf("SELECT one FROM sushi.top_waiters LIMIT 1") - assert len(df) == 1 - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_new_forward_only_model(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, enable_preview=False) - - snapshot = context.get_snapshot("sushi.marketing") - - # The deployable table should not exist yet - assert not context.engine_adapter.table_exists(snapshot.table_name()) - assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) - - context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) - - assert context.engine_adapter.table_exists(snapshot.table_name()) - assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_annotated_self_referential_model(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - # Projections are fully annotated in the query but columns were not specified explicitly - expressions = d.parse( - f""" - MODEL ( - name memory.sushi.test_self_ref, - kind FULL, - start '2023-01-01', - ); - - SELECT 1::INT AS one FROM memory.sushi.test_self_ref; - """ - ) - model = load_sql_based_model(expressions) - assert model.depends_on_self - context.upsert_model(model) - - context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) - - df = context.fetchdf("SELECT one FROM memory.sushi.test_self_ref") - assert len(df) == 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_set_choice_is_reflected_in_missing_intervals(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) - context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan_builder = context.plan_builder("dev", skip_tests=True) - plan = plan_builder.build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Change the category to BREAKING - plan = plan_builder.set_choice( - plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.BREAKING - ).build() - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_BREAKING - ) - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Change the category back to NON_BREAKING - plan = plan_builder.set_choice( - plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.NON_BREAKING - ).build() - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [ - pd.to_datetime(x) - for x in [ - "2023-01-01", - "2023-01-02", - "2023-01-03", - "2023-01-04", - "2023-01-05", - "2023-01-06", - "2023-01-07", - ] - ] - - # Promote changes to prod - prod_plan = context.plan_builder(skip_tests=True).build() - assert not prod_plan.missing_intervals - - context.apply(prod_plan) - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert prod_df["event_date"].tolist() == [ - pd.to_datetime(x) - for x in [ - "2023-01-01", - "2023-01-02", - "2023-01-03", - "2023-01-04", - "2023-01-05", - "2023-01-06", - "2023-01-07", - ] - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) -@pytest.mark.parametrize("has_view_binding", [False, True]) -def test_non_breaking_change_after_forward_only_in_dev( - init_and_plan_context: t.Callable, has_view_binding: bool -): - context, plan = init_and_plan_context("examples/sushi") - context.snapshot_evaluator.adapter.HAS_VIEW_BINDING = has_view_binding - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert all(s.is_forward_only for s in plan.new_snapshots) - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - ] - - # Apply the forward-only changes first. - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] - - # Make a non-breaking change to a model downstream. - model = context.get_model("sushi.top_waiters") - # Select 'one' column from the updated upstream model. - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model), literal=False)) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True).build() - assert len(plan.new_snapshots) == 1 - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert to_timestamp(plan.start) == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Apply the non-breaking changes. - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT waiter_id FROM sushi__dev.top_waiters WHERE one IS NOT NULL" - ) - assert not dev_df.empty - - prod_df = context.engine_adapter.fetchdf("DESCRIBE sushi.top_waiters") - assert "one" not in prod_df["column_name"].tolist() - - # Deploy both changes to prod. - plan = context.plan_builder("prod", skip_tests=True).build() - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert prod_df.empty - - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT waiter_id FROM sushi.top_waiters WHERE one IS NOT NULL" - ) - assert prod_df.empty - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_change_after_forward_only_in_dev(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - # Make sure that the most downstream model is a materialized model. - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - # Make sushi.orders a forward-only model. - model = context.get_model("sushi.orders") - updated_model_kind = model.kind.copy(update={"forward_only": True}) - model = model.copy(update={"stamp": "force new version", "kind": updated_model_kind}) - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - - plan = context.plan_builder( - "dev", - skip_tests=True, - enable_preview=False, - categorizer_config=CategorizerConfig.all_full(), - ).build() - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert not plan.requires_backfill - context.apply(plan) - - # Make a non-breaking change to a model. - model = context.get_model("sushi.top_waiters") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 1 - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Apply the non-breaking changes. - context.apply(plan) - - # Make a non-breaking change upstream from the previously modified model. - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Apply the upstream non-breaking changes. - context.apply(plan) - assert not context.plan_builder("dev", skip_tests=True).build().requires_backfill - - # Deploy everything to prod. - plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - assert ( - not context.plan_builder("prod", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_changes_downstream_of_indirect_non_breaking_snapshot_without_intervals( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Make a breaking change first but don't backfill it - model = context.get_model("sushi.orders") - model = model.copy(update={"stamp": "force new version"}) - context.upsert_model(model) - plan_builder = context.plan_builder( - "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True - ) - plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.BREAKING) - context.apply(plan_builder.build()) - - # Now make a non-breaking change to the same snapshot. - model = model.copy(update={"stamp": "force another new version"}) - context.upsert_model(model) - plan_builder = context.plan_builder( - "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True - ) - plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.NON_BREAKING) - context.apply(plan_builder.build()) - - # Now make a change to a model downstream of the above model. - downstream_model = context.get_model("sushi.top_waiters") - downstream_model = downstream_model.copy(update={"stamp": "yet another new version"}) - context.upsert_model(downstream_model) - plan = context.plan_builder("dev", skip_tests=True).build() - - # If the parent is not representative then the child cannot be deployable - deployability_index = plan.deployability_index - assert not deployability_index.is_representative( - context.get_snapshot("sushi.waiter_revenue_by_day") - ) - assert not deployability_index.is_deployable(context.get_snapshot("sushi.top_waiters")) - - -@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) -def test_metadata_change_after_forward_only_results_in_migration(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Make a forward-only change - model = context.get_model("sushi.waiter_revenue_by_day") - model = model.copy(update={"kind": model.kind.copy(update={"forward_only": True})}) - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) - assert len(plan.new_snapshots) == 2 - assert all(s.is_forward_only for s in plan.new_snapshots) - - # Follow-up with a metadata change in the same environment - model = model.copy(update={"owner": "new_owner"}) - context.upsert_model(model) - plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) - assert len(plan.new_snapshots) == 2 - assert all(s.change_category == SnapshotChangeCategory.METADATA for s in plan.new_snapshots) - - # Deploy the latest change to prod - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - # Check that the new column was added in prod - columns = context.engine_adapter.columns("sushi.waiter_revenue_by_day") - assert "one" in columns - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_downstream_of_forward_only(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Make sushi.orders a forward-only model. - forward_only_model = context.get_model("sushi.orders") - updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) - forward_only_model = forward_only_model.copy( - update={"stamp": "force new version", "kind": updated_model_kind} - ) - context.upsert_model(forward_only_model) - forward_only_snapshot = context.get_snapshot(forward_only_model, raise_if_missing=True) - - non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") - non_breaking_model = non_breaking_model.copy(update={"start": "2023-01-01"}) - context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) - non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) - top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder( - "dev", - skip_tests=True, - enable_preview=False, - categorizer_config=CategorizerConfig.all_full(), - ).build() - assert ( - plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].is_forward_only - assert not plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].is_forward_only - assert not plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].is_forward_only - - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiter_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=non_breaking_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - assert ( - not context.plan_builder("dev", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - # Deploy everything to prod. - plan = context.plan_builder("prod", skip_tests=True).build() - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiter_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=non_breaking_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - assert ( - not context.plan_builder("prod", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_breaking_only_impacts_immediate_children(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - breaking_model = context.get_model("sushi.orders") - breaking_model = breaking_model.copy(update={"stamp": "force new version"}) - context.upsert_model(breaking_model) - breaking_snapshot = context.get_snapshot(breaking_model, raise_if_missing=True) - - non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) - non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) - top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan_builder = context.plan_builder("dev", skip_tests=True, enable_preview=False) - plan_builder.set_choice(breaking_snapshot, SnapshotChangeCategory.BREAKING) - plan = plan_builder.build() - assert ( - plan.context_diff.snapshots[breaking_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert not any(i.snapshot_id == top_waiter_snapshot.snapshot_id for i in plan.missing_intervals) - - context.apply(plan) - assert ( - not context.plan_builder("dev", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - # Deploy everything to prod. - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.missing_intervals - - context.apply(plan) - assert ( - not context.plan_builder("prod", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_run_with_select_models( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - assert context.run(select_models=["*waiter_revenue_by_day"]) - - snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) - # Only waiter_revenue_by_day and its parents should be backfilled up to 2023-01-09. - assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { - '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), - '"memory"."sushi"."items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), - '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), - '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), - "assert_item_price_above_zero": to_timestamp("2023-01-08"), - '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_seed_model_promote_to_prod_after_dev( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - with open(context.path / "seeds" / "waiter_names.csv", "a") as f: - f.write("\n10,New Waiter") - - context.load() - - waiter_names_snapshot = context.get_snapshot("sushi.waiter_names") - plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) - assert waiter_names_snapshot.snapshot_id in plan.directly_modified - - # Trigger a metadata change to reuse the previous version - waiter_names_model = waiter_names_snapshot.model.copy( - update={"description": "Updated description"} - ) - context.upsert_model(waiter_names_model) - context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) - - # Promote all changes to prod - waiter_names_snapshot = context.get_snapshot("sushi.waiter_names") - plan = context.plan_builder("prod", skip_tests=True).build() - # Clear the cache to source the dehydrated model instance from the state - context.clear_caches() - context.apply(plan) - - assert ( - context.engine_adapter.fetchone("SELECT COUNT(*) FROM sushi.waiter_names WHERE id = 10")[0] - == 1 - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_with_run( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - plan = context.plan(run=True) - assert plan.has_changes - assert plan.missing_intervals - - context.apply(plan) - - snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) - assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { - '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), - '"memory"."sushi"."items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."latest_order"': to_timestamp("2023-01-09"), - '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), - '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-09"), - '"memory"."sushi"."marketing"': to_timestamp("2023-01-09"), - '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-09"), - '"memory"."raw"."demographics"': to_timestamp("2023-01-09"), - "assert_item_price_above_zero": to_timestamp("2023-01-09"), - '"memory"."sushi"."active_customers"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customers"': to_timestamp("2023-01-09"), - '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-09"), - '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-09"), - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_ignore_cron( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - expressions = d.parse( - f""" - MODEL ( - name memory.sushi.test_allow_partials, - kind INCREMENTAL_UNMANAGED, - allow_partials true, - start '2023-01-01', - ); - - SELECT @end_ts AS end_ts - """ - ) - model = load_sql_based_model(expressions) - - context.upsert_model(model) - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - assert ( - context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ - 0 - ] - == "2023-01-07 23:59:59.999999" - ) - - plan_no_ignore_cron = context.plan_builder( - "prod", run=True, ignore_cron=False, skip_tests=True - ).build() - assert not plan_no_ignore_cron.missing_intervals - - plan = context.plan_builder("prod", run=True, ignore_cron=True, skip_tests=True).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot(model, raise_if_missing=True).snapshot_id, - intervals=[ - (to_timestamp("2023-01-08"), to_timestamp("2023-01-08 15:00:00")), - ], - ) - ] - context.apply(plan) - - assert ( - context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ - 0 - ] - == "2023-01-08 14:59:59.999999" - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_run_respects_excluded_transitive_dependencies(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - # Graph: C <- B <- A - # B is a transitive dependency linking A and C - # Note that the alphabetical ordering of the model names is intentional and helps - # surface the problem - expressions_a = d.parse( - f""" - MODEL ( - name memory.sushi.test_model_c, - kind FULL, - allow_partials true, - cron '@hourly', - ); - - SELECT @execution_ts AS execution_ts - """ - ) - model_c = load_sql_based_model(expressions_a) - context.upsert_model(model_c) - - # A VIEW model with no partials allowed and a daily cron instead of hourly. - expressions_b = d.parse( - f""" - MODEL ( - name memory.sushi.test_model_b, - kind VIEW, - allow_partials false, - cron '@daily', - ); - - SELECT * FROM memory.sushi.test_model_c - """ - ) - model_b = load_sql_based_model(expressions_b) - context.upsert_model(model_b) - - expressions_a = d.parse( - f""" - MODEL ( - name memory.sushi.test_model_a, - kind FULL, - allow_partials true, - cron '@hourly', - ); - - SELECT * FROM memory.sushi.test_model_b - """ - ) - model_a = load_sql_based_model(expressions_a) - context.upsert_model(model_a) - - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - assert ( - context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_c")["execution_ts"].iloc[ - 0 - ] - == "2023-01-08 15:00:00" - ) - - with time_machine.travel("2023-01-08 17:00:00 UTC", tick=False): - context.run( - "prod", - select_models=["*test_model_c", "*test_model_a"], - no_auto_upstream=True, - ignore_cron=True, - ) - assert ( - context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_a")[ - "execution_ts" - ].iloc[0] - == "2023-01-08 17:00:00" - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_run_with_select_models_no_auto_upstream( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj({**model.dict(), "audits": []}) - context.upsert_model(model) - - context.plan("prod", no_prompts=True, skip_tests=True, auto_apply=True) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - assert context.run(select_models=["*waiter_revenue_by_day"], no_auto_upstream=True) - - snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) - # Only waiter_revenue_by_day should be backfilled up to 2023-01-09. - assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { - '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."order_items"': to_timestamp("2023-01-08"), - '"memory"."sushi"."orders"': to_timestamp("2023-01-08"), - '"memory"."sushi"."items"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), - '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), - '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), - "assert_item_price_above_zero": to_timestamp("2023-01-08"), - '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_select_models(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Modify 2 models. - model = context.get_model("sushi.waiter_revenue_by_day") - kwargs = { - **model.dict(), - # Make a breaking change. - "query": model.query.order_by("waiter_id"), # type: ignore - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - model = context.get_model("sushi.customer_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - expected_intervals = [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ] - - waiter_revenue_by_day_snapshot_id = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id - - # Select one of the modified models. - plan_builder = context.plan_builder( - "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True - ) - snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] - plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) - plan = plan_builder.build() - - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert len(dev_df) == 7 - - # Make sure that we only create a view for the selected model. - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert len(schema_objects) == 1 - assert schema_objects[0].name == "waiter_revenue_by_day" - - # Validate the other modified model. - assert not context.get_snapshot("sushi.customer_revenue_by_day").change_category - assert not context.get_snapshot("sushi.customer_revenue_by_day").version - - # Validate the downstream model. - assert not context.engine_adapter.table_exists( - context.get_snapshot("sushi.top_waiters").table_name() - ) - assert not context.engine_adapter.table_exists( - context.get_snapshot("sushi.top_waiters").table_name(False) - ) - - # Make sure that tables are created when deploying to prod. - plan = context.plan("prod", skip_tests=True) - context.apply(plan) - assert context.engine_adapter.table_exists( - context.get_snapshot("sushi.top_waiters").table_name() - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_select_unchanged_model_for_backfill(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Modify 2 models. - model = context.get_model("sushi.waiter_revenue_by_day") - kwargs = { - **model.dict(), - # Make a breaking change. - "query": d.parse_one( - f"{model.query.sql(dialect='duckdb')} ORDER BY waiter_id", dialect="duckdb" - ), - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - model = context.get_model("sushi.customer_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - expected_intervals = [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ] - - waiter_revenue_by_day_snapshot_id = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id - - # Select one of the modified models. - plan_builder = context.plan_builder( - "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True - ) - snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] - plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) - plan = plan_builder.build() - - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - # Make sure that we only create a view for the selected model. - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == {"waiter_revenue_by_day"} - - # Now select a model downstream from the previously modified one in order to backfill it. - plan = context.plan_builder("dev", select_models=["*top_waiters"], skip_tests=True).build() - - assert not plan.has_changes - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.top_waiters", raise_if_missing=True - ).snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - # Make sure that a view has been created for the downstream selected model. - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == {"waiter_revenue_by_day", "top_waiters"} - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_snapshot_triggers(init_and_plan_context: t.Callable, mocker: MockerFixture): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # auto-restatement triggers - orders = context.get_model("sushi.orders") - orders_kind = { - **orders.kind.dict(), - "auto_restatement_cron": "@hourly", - } - orders_kwargs = { - **orders.dict(), - "kind": orders_kind, - } - context.upsert_model(PythonModel.parse_obj(orders_kwargs)) - - order_items = context.get_model("sushi.order_items") - order_items_kind = { - **order_items.kind.dict(), - "auto_restatement_cron": "@hourly", - } - order_items_kwargs = { - **order_items.dict(), - "kind": order_items_kind, - } - context.upsert_model(PythonModel.parse_obj(order_items_kwargs)) - - waiter_revenue_by_day = context.get_model("sushi.waiter_revenue_by_day") - waiter_revenue_by_day_kind = { - **waiter_revenue_by_day.kind.dict(), - "auto_restatement_cron": "@hourly", - } - waiter_revenue_by_day_kwargs = { - **waiter_revenue_by_day.dict(), - "kind": waiter_revenue_by_day_kind, - } - context.upsert_model(SqlModel.parse_obj(waiter_revenue_by_day_kwargs)) - - context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) - - scheduler = context.scheduler() - - import sqlmesh - - spy = mocker.spy(sqlmesh.core.scheduler.Scheduler, "run_merged_intervals") - - with time_machine.travel("2023-01-09 00:00:01 UTC"): - scheduler.run( - environment=c.PROD, - start="2023-01-01", - auto_restatement_enabled=True, - ) - - assert spy.called - - actual_triggers = spy.call_args.kwargs["auto_restatement_triggers"] - actual_triggers = {k: v for k, v in actual_triggers.items() if v} - assert len(actual_triggers) == 12 - - for id, trigger in actual_triggers.items(): - model_name = id.name.replace('"memory"."sushi".', "").replace('"', "") - auto_restatement_triggers = [ - t.name.replace('"memory"."sushi".', "").replace('"', "") for t in trigger - ] - - if model_name in ("orders", "order_items", "waiter_revenue_by_day"): - assert auto_restatement_triggers == [model_name] - elif model_name in ("customer_revenue_lifetime", "customer_revenue_by_day"): - assert sorted(auto_restatement_triggers) == sorted(["orders", "order_items"]) - elif model_name == "top_waiters": - assert auto_restatement_triggers == ["waiter_revenue_by_day"] - else: - assert auto_restatement_triggers == ["orders"] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_max_interval_end_per_model_not_applied_when_end_is_provided( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - context.run() - - plan = context.plan_builder( - restate_models=["*"], start="2023-01-09", end="2023-01-09" - ).build() - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_select_models_for_backfill(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - expected_intervals = [ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ] - - plan = context.plan_builder( - "dev", backfill_models=["+*waiter_revenue_by_day"], skip_tests=True - ).build() - - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.items", raise_if_missing=True).snapshot_id, - intervals=expected_intervals, - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.order_items", raise_if_missing=True - ).snapshot_id, - intervals=expected_intervals, - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.orders", raise_if_missing=True).snapshot_id, - intervals=expected_intervals, - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert len(dev_df) == 1 - - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == { - "items", - "order_items", - "orders", - "waiter_revenue_by_day", - } - - assert not context.engine_adapter.table_exists( - context.get_snapshot("sushi.customer_revenue_by_day").table_name() - ) - - # Make sure that tables are created when deploying to prod. - plan = context.plan("prod") - context.apply(plan) - assert context.engine_adapter.table_exists( - context.get_snapshot("sushi.customer_revenue_by_day").table_name() - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_select_star_is_directly_modified(sushi_test_dbt_context: Context): - context = sushi_test_dbt_context - - model = context.get_model("sushi.simple_model_a") - context.upsert_model( - model, - query_=ParsableSql(sql="SELECT 1 AS a, 2 AS b"), - ) - - snapshot_a_id = context.get_snapshot("sushi.simple_model_a").snapshot_id # type: ignore - snapshot_b_id = context.get_snapshot("sushi.simple_model_b").snapshot_id # type: ignore - - plan = context.plan_builder("dev", skip_tests=True).build() - assert plan.directly_modified == {snapshot_a_id, snapshot_b_id} - assert {i.snapshot_id for i in plan.missing_intervals} == {snapshot_a_id, snapshot_b_id} - - assert plan.snapshots[snapshot_a_id].change_category == SnapshotChangeCategory.NON_BREAKING - assert plan.snapshots[snapshot_b_id].change_category == SnapshotChangeCategory.NON_BREAKING - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_is_incremental_table_is_missing(sushi_test_dbt_context: Context): - context = sushi_test_dbt_context - - model = context.get_model("sushi.waiter_revenue_by_day_v2") - model = model.copy(update={"kind": IncrementalUnmanagedKind(), "start": "2023-01-01"}) - context.upsert_model(model) - context._standalone_audits["test_top_waiters"].start = "2023-01-01" - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - snapshot = context.get_snapshot("sushi.waiter_revenue_by_day_v2") - assert snapshot - - # Manually drop the table - context.engine_adapter.drop_table(snapshot.table_name()) - - context.snapshot_evaluator.evaluate( - snapshot, - start="2023-01-01", - end="2023-01-08", - execution_time="2023-01-08 15:00:00", - snapshots={s.name: s for s in context.snapshots.values()}, - deployability_index=DeployabilityIndex.all_deployable(), - ) - - # Make sure the table was recreated - assert context.engine_adapter.table_exists(snapshot.table_name()) - - -def test_model_attr(sushi_test_dbt_context: Context, assert_exp_eq): - context = sushi_test_dbt_context - model = context.get_model("sushi.top_waiters") - assert_exp_eq( - model.render_query(), - """ - SELECT - CAST("waiter_id" AS INT) AS "waiter_id", - CAST("revenue" AS DOUBLE) AS "revenue", - 3 AS "model_columns" - FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" - WHERE - "ds" = ( - SELECT - MAX("ds") - FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" - ) - ORDER BY - "revenue" DESC NULLS FIRST - LIMIT 10 - """, - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_incremental_by_partition(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - source_name = "raw.test_incremental_by_partition" - model_name = "memory.sushi.test_incremental_by_partition" - - expressions = d.parse( - f""" - MODEL ( - name {model_name}, - kind INCREMENTAL_BY_PARTITION (disable_restatement false), - partitioned_by [key], - allow_partials true, - start '2023-01-07', - ); - - SELECT key, value FROM {source_name}; - """ - ) - model = load_sql_based_model(expressions) - context.upsert_model(model) - - context.engine_adapter.ctas( - source_name, - d.parse_one("SELECT 'key_a' AS key, 1 AS value"), - ) - - context.plan(auto_apply=True, no_prompts=True) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_a", 1), - ] - - context.engine_adapter.replace_query( - source_name, - d.parse_one("SELECT 'key_b' AS key, 1 AS value"), - ) - context.run(ignore_cron=True) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_a", 1), - ("key_b", 1), - ] - - context.engine_adapter.replace_query( - source_name, - d.parse_one("SELECT 'key_a' AS key, 2 AS value"), - ) - # Run 1 minute later. - with time_machine.travel("2023-01-08 15:01:00 UTC"): - context.run(ignore_cron=True) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_b", 1), - ("key_a", 2), - ] - - # model should fully refresh on restatement - context.engine_adapter.replace_query( - source_name, - d.parse_one("SELECT 'key_c' AS key, 3 AS value"), - ) - context.plan(auto_apply=True, no_prompts=True, restate_models=[model_name]) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_c", 3), - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_custom_materialization(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - custom_insert_called = False - - class CustomFullMaterialization(CustomMaterialization): - NAME = "test_custom_full" - - def insert( - self, - table_name: str, - query_or_df: QueryOrDF, - model: Model, - is_first_insert: bool, - render_kwargs: t.Dict[str, t.Any], - **kwargs: t.Any, - ) -> None: - nonlocal custom_insert_called - custom_insert_called = True - - self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) - - model = context.get_model("sushi.top_waiters") - kwargs = { - **model.dict(), - # Make a breaking change. - "kind": dict(name="CUSTOM", materialization="test_custom_full"), - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - context.plan(auto_apply=True, no_prompts=True) - - assert custom_insert_called - - -# needs to be defined at the top level. If its defined within the test body, -# adding to the snapshot cache fails with: AttributeError: Can't pickle local object -class TestCustomKind(CustomKind): - __test__ = False # prevent pytest warning since this isnt a class containing tests - - @property - def custom_property(self) -> str: - return validate_string(self.materialization_properties.get("custom_property")) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_custom_materialization_with_custom_kind(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - custom_insert_calls = [] - - class CustomFullMaterialization(CustomMaterialization[TestCustomKind]): - NAME = "test_custom_full_with_custom_kind" - - def insert( - self, - table_name: str, - query_or_df: QueryOrDF, - model: Model, - is_first_insert: bool, - render_kwargs: t.Dict[str, t.Any], - **kwargs: t.Any, - ) -> None: - assert isinstance(model.kind, TestCustomKind) - - nonlocal custom_insert_calls - custom_insert_calls.append(model.kind.custom_property) - - self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) - - model = context.get_model("sushi.top_waiters") - kwargs = { - **model.dict(), - # Make a breaking change. - "kind": dict( - name="CUSTOM", - materialization="test_custom_full_with_custom_kind", - materialization_properties={"custom_property": "pytest"}, - ), - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - context.plan(auto_apply=True) - - assert custom_insert_calls == ["pytest"] - - # no changes - context.plan(auto_apply=True) - - assert custom_insert_calls == ["pytest"] - - # change a property on the custom kind, breaking change - kwargs["kind"]["materialization_properties"]["custom_property"] = "some value" - context.upsert_model(SqlModel.parse_obj(kwargs)) - context.plan(auto_apply=True) - - assert custom_insert_calls == ["pytest", "some value"] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_view_model_non_representative_snapshot( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - # Forward-only parent - forward_only_model_name = "memory.sushi.test_forward_only_model" - forward_only_model_expressions = d.parse( - f""" - MODEL ( - name {forward_only_model_name}, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - ), - ); - - SELECT '2023-01-01' AS ds, 'value' AS value; - """ - ) - forward_only_model = load_sql_based_model(forward_only_model_expressions) - assert forward_only_model.forward_only - context.upsert_model(forward_only_model) - - # FULL downstream model. - full_downstream_model_name = "memory.sushi.test_full_downstream_model" - full_downstream_model_expressions = d.parse( - f""" - MODEL ( - name {full_downstream_model_name}, - kind FULL, - ); - - SELECT ds, value FROM {forward_only_model_name}; - """ - ) - full_downstream_model = load_sql_based_model(full_downstream_model_expressions) - context.upsert_model(full_downstream_model) - - # VIEW downstream of the previous FULL model. - view_downstream_model_name = "memory.sushi.test_view_downstream_model" - view_downstream_model_expressions = d.parse( - f""" - MODEL ( - name {view_downstream_model_name}, - kind VIEW, - ); - - SELECT ds, value FROM {full_downstream_model_name}; - """ - ) - view_downstream_model = load_sql_based_model(view_downstream_model_expressions) - context.upsert_model(view_downstream_model) - - # Apply the initial plan with all 3 models. - context.plan(auto_apply=True, no_prompts=True) - - # Make a change to the forward-only model and apply it in dev. - context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) - forward_only_model_snapshot_id = context.get_snapshot(forward_only_model_name).snapshot_id - full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id - view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id - dev_plan = context.plan("dev", auto_apply=True, no_prompts=True, enable_preview=False) - assert ( - dev_plan.snapshots[forward_only_model_snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - dev_plan.snapshots[full_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - dev_plan.snapshots[view_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert not dev_plan.missing_intervals - - # Make a follow-up breaking change to the downstream full model. - new_full_downstream_model_expressions = d.parse( - f""" - MODEL ( - name {full_downstream_model_name}, - kind FULL, - ); - - SELECT ds, 'new_value' AS value FROM {forward_only_model_name}; - """ - ) - new_full_downstream_model = load_sql_based_model(new_full_downstream_model_expressions) - context.upsert_model(new_full_downstream_model) - full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id - view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id - dev_plan = context.plan( - "dev", - categorizer_config=CategorizerConfig.all_full(), - auto_apply=True, - no_prompts=True, - enable_preview=False, - ) - assert ( - dev_plan.snapshots[full_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - dev_plan.snapshots[view_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_BREAKING - ) - assert len(dev_plan.missing_intervals) == 2 - assert dev_plan.missing_intervals[0].snapshot_id == full_downstream_model_snapshot_id - assert dev_plan.missing_intervals[1].snapshot_id == view_downstream_model_snapshot_id - - # Check that the representative view hasn't been created yet. - assert not context.engine_adapter.table_exists( - context.get_snapshot(view_downstream_model_name).table_name() - ) - - # Now promote the very first change to prod without promoting the 2nd breaking change. - context.upsert_model(full_downstream_model) - context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) - - # Finally, make a non-breaking change to the full model in the same dev environment. - context.upsert_model(add_projection_to_model(t.cast(SqlModel, new_full_downstream_model))) - full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id - view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id - dev_plan = context.plan( - "dev", - categorizer_config=CategorizerConfig.all_full(), - auto_apply=True, - no_prompts=True, - enable_preview=False, - ) - assert ( - dev_plan.snapshots[full_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - dev_plan.snapshots[view_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - - # Deploy changes to prod - context.plan("prod", auto_apply=True, no_prompts=True) - - # Check that the representative view has been created. - assert context.engine_adapter.table_exists( - context.get_snapshot(view_downstream_model_name).table_name() - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_view_model_non_representative_snapshot_migration( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - forward_only_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.forward_only_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - forward_only_model = load_sql_based_model(forward_only_model_expr) - context.upsert_model(forward_only_model) - - downstream_view_a_expr = d.parse( - """ - MODEL ( - name memory.sushi.downstream_view_a, - kind VIEW, - ); - - SELECT a from memory.sushi.forward_only_model; - """ - ) - downstream_view_a = load_sql_based_model(downstream_view_a_expr) - context.upsert_model(downstream_view_a) - - downstream_view_b_expr = d.parse( - """ - MODEL ( - name memory.sushi.downstream_view_b, - kind VIEW, - ); - - SELECT a from memory.sushi.downstream_view_a; - """ - ) - downstream_view_b = load_sql_based_model(downstream_view_b_expr) - context.upsert_model(downstream_view_b) - - context.plan(auto_apply=True, no_prompts=True, skip_tests=True) - - # Make a forward-only change - context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) - # Make a non-breaking change downstream - context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_view_a))) - - context.plan(auto_apply=True, no_prompts=True, skip_tests=True) - - # Make sure the downstrean indirect non-breaking view is available in prod - count = context.engine_adapter.fetchone("SELECT COUNT(*) FROM memory.sushi.downstream_view_b")[ - 0 - ] - assert count > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@pytest.mark.parametrize( - "parent_a_category,parent_b_category,expected_child_category", - [ - ( - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.INDIRECT_BREAKING, - ), - ( - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.INDIRECT_NON_BREAKING, - ), - ( - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.INDIRECT_NON_BREAKING, - ), - ( - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.INDIRECT_BREAKING, - ), - ( - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - ), - ( - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - ), - ( - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.INDIRECT_BREAKING, - ), - ( - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.INDIRECT_NON_BREAKING, - ), - ( - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - ), - ], -) -def test_rebase_two_changed_parents( - init_and_plan_context: t.Callable, - parent_a_category: SnapshotChangeCategory, # This change is deployed to prod first - parent_b_category: SnapshotChangeCategory, # This change is deployed to prod second - expected_child_category: SnapshotChangeCategory, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - initial_model_a = context.get_model("sushi.orders") - initial_model_b = context.get_model("sushi.items") - - # Make change A and deploy it to dev_a - context.upsert_model(initial_model_a.name, stamp="1") - plan_builder = context.plan_builder("dev_a", skip_tests=True) - plan_builder.set_choice(context.get_snapshot(initial_model_a.name), parent_a_category) - context.apply(plan_builder.build()) - - # Make change B and deploy it to dev_b - context.upsert_model(initial_model_a) - context.upsert_model(initial_model_b.name, stamp="1") - plan_builder = context.plan_builder("dev_b", skip_tests=True) - plan_builder.set_choice(context.get_snapshot(initial_model_b.name), parent_b_category) - context.apply(plan_builder.build()) - - # Deploy change A to prod - context.upsert_model(initial_model_a.name, stamp="1") - context.upsert_model(initial_model_b) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Apply change B in addition to A and plan against prod - context.upsert_model(initial_model_b.name, stamp="1") - plan = context.plan_builder("prod", skip_tests=True).build() - - # Validate the category of child snapshots - direct_child_snapshot = plan.snapshots[context.get_snapshot("sushi.order_items").snapshot_id] - assert direct_child_snapshot.change_category == expected_child_category - - indirect_child_snapshot = plan.snapshots[context.get_snapshot("sushi.top_waiters").snapshot_id] - assert indirect_child_snapshot.change_category == expected_child_category - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_unaligned_start_snapshot_with_non_deployable_downstream(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - downstream_model_name = "memory.sushi.customer_max_revenue" - - expressions = d.parse( - f""" - MODEL ( - name {downstream_model_name}, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key customer_id, - forward_only true, - ), - ); - - SELECT - customer_id, MAX(revenue) AS max_revenue - FROM memory.sushi.customer_revenue_lifetime - GROUP BY 1; - """ - ) - - downstream_model = load_sql_based_model(expressions) - assert downstream_model.forward_only - context.upsert_model(downstream_model) - - context.plan(auto_apply=True, no_prompts=True) - - customer_revenue_lifetime_model = context.get_model("sushi.customer_revenue_lifetime") - kwargs = { - **customer_revenue_lifetime_model.dict(), - "name": "memory.sushi.customer_revenue_lifetime_new", - "kind": dict( - name="INCREMENTAL_UNMANAGED" - ), # Make it incremental unmanaged to ensure the depends_on_past behavior. - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - context.upsert_model( - downstream_model_name, - query_=ParsableSql( - sql="SELECT customer_id, MAX(revenue) AS max_revenue FROM memory.sushi.customer_revenue_lifetime_new GROUP BY 1" - ), - ) - - plan = context.plan_builder("dev", enable_preview=True).build() - assert {s.name for s in plan.new_snapshots} == { - '"memory"."sushi"."customer_revenue_lifetime_new"', - '"memory"."sushi"."customer_max_revenue"', - } - for snapshot_interval in plan.missing_intervals: - assert not plan.deployability_index.is_deployable(snapshot_interval.snapshot_id) - assert snapshot_interval.intervals[0][0] == to_timestamp("2023-01-07") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - - assert all( - s.virtual_environment_mode.is_dev_only or not s.is_model or s.is_symbolic - for s in context.snapshots.values() - ) - - # Init prod - context.plan("prod", auto_apply=True, no_prompts=True) - - # Make a change in dev - original_model = context.get_model("sushi.waiter_revenue_by_day") - original_fingerprint = context.get_snapshot(original_model.name).fingerprint - model = original_model.copy( - update={ - "query_": ParsableSql( - sql=original_model.query.order_by("waiter_id").sql(dialect=original_model.dialect) - ) - } - ) - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - - plan_dev = context.plan_builder("dev").build() - assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") - assert plan_dev.requires_backfill - assert plan_dev.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - ] - assert plan_dev.context_diff.snapshots[context.get_snapshot(model.name).snapshot_id].intervals - assert plan_dev.context_diff.snapshots[ - context.get_snapshot("sushi.top_waiters").snapshot_id - ].intervals - assert plan_dev.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].dev_intervals - assert plan_dev.context_diff.snapshots[ - context.get_snapshot("sushi.top_waiters").snapshot_id - ].dev_intervals - context.apply(plan_dev) - - # Make sure the waiter_revenue_by_day model is a table in prod and a view in dev - table_types_df = context.engine_adapter.fetchdf( - "SELECT table_schema, table_type FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 'waiter_revenue_by_day'" - ) - assert table_types_df.to_dict("records") == [ - {"table_schema": "sushi", "table_type": "BASE TABLE"}, - {"table_schema": "sushi__dev", "table_type": "VIEW"}, - ] - - # Check that the specified dates were backfilled - min_event_date = context.engine_adapter.fetchone( - "SELECT MIN(event_date) FROM sushi__dev.waiter_revenue_by_day" - )[0] - assert min_event_date == to_date("2023-01-07") - - # Make sure the changes are applied without backfill in prod - plan_prod = context.plan_builder("prod").build() - assert not plan_prod.requires_backfill - assert not plan_prod.missing_intervals - context.apply(plan_prod) - assert "one" in context.engine_adapter.columns("sushi.waiter_revenue_by_day") - - # Make sure the revert of a breaking changes results in a full rebuild - context.upsert_model(original_model) - assert context.get_snapshot(original_model.name).fingerprint == original_fingerprint - - plan_prod = context.plan_builder( - "prod", allow_destructive_models=["sushi.waiter_revenue_by_day"] - ).build() - assert not plan_prod.requires_backfill - assert not plan_prod.missing_intervals - context.apply(plan_prod) - assert "one" not in context.engine_adapter.columns("sushi.waiter_revenue_by_day") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - # Change to full kind - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.missing_intervals - assert prod_plan.requires_backfill - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - # Change back to view - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": ViewKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "view" - - # Change to incremental - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": IncrementalUnmanagedKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - # Change back to full - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change_incremental( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - - forward_only_model_name = "memory.sushi.test_forward_only_model" - forward_only_model_expressions = d.parse( - f""" - MODEL ( - name {forward_only_model_name}, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - ), - ); - - SELECT '2023-01-01' AS ds, 'value' AS value; - """ - ) - forward_only_model = load_sql_based_model(forward_only_model_expressions) - forward_only_model = forward_only_model.copy( - update={"virtual_environment_mode": VirtualEnvironmentMode.DEV_ONLY} - ) - context.upsert_model(forward_only_model) - - context.plan("prod", auto_apply=True, no_prompts=True) - - # Change to view - model = context.get_model(forward_only_model_name) - original_kind = model.kind - model = model.copy(update={"kind": ViewKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "view" - - model = model.copy(update={"kind": original_kind}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change_with_follow_up_changes_in_dev( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - # Make sure the initial state is a view - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "view" - - # Change to incremental unmanaged kind - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": IncrementalUnmanagedKind()}) - context.upsert_model(model) - dev_plan = context.plan_builder("dev", skip_tests=True).build() - assert dev_plan.missing_intervals - assert dev_plan.requires_backfill - context.apply(dev_plan) - - # Make a follow-up forward-only change - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - dev_plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() - context.apply(dev_plan) - - # Deploy to prod - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change_manual_categorization( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - dev_plan_builder = context.plan_builder("dev", skip_tests=True, no_auto_categorization=True) - dev_plan_builder.set_choice( - dev_plan_builder._context_diff.snapshots[context.get_snapshot(model.name).snapshot_id], - SnapshotChangeCategory.NON_BREAKING, - ) - dev_plan = dev_plan_builder.build() - assert dev_plan.requires_backfill - assert len(dev_plan.missing_intervals) == 1 - context.apply(dev_plan) - - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_seed_model_change( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.load() - context.plan("prod", auto_apply=True, no_prompts=True) - - seed_model = context.get_model("sushi.waiter_names") - with open(seed_model.seed_path, "a") as fd: - fd.write("\n123,New Test Name") - - context.load() - seed_model_snapshot = context.get_snapshot("sushi.waiter_names") - plan = context.plan_builder("dev").build() - assert plan.directly_modified == {seed_model_snapshot.snapshot_id} - assert len(plan.missing_intervals) == 2 - context.apply(plan) - - actual_seed_df_in_dev = context.fetchdf("SELECT * FROM sushi__dev.waiter_names WHERE id = 123") - assert actual_seed_df_in_dev.to_dict("records") == [{"id": 123, "name": "New Test Name"}] - actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") - assert actual_seed_df_in_prod.empty - - plan = context.plan_builder("prod").build() - assert plan.directly_modified == {seed_model_snapshot.snapshot_id} - assert len(plan.missing_intervals) == 1 - assert plan.missing_intervals[0].snapshot_id == seed_model_snapshot.snapshot_id - context.apply(plan) - - actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") - assert actual_seed_df_in_prod.to_dict("records") == [{"id": 123, "name": "New Test Name"}] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_change_downstream_of_seed( - init_and_plan_context: t.Callable, -): - """This test covers a scenario when a model downstream of a seed model is modified and explicitly selected - causing an (unhydrated) seed model to sourced from the state. If SQLMesh attempts to create - a table for the unchanged seed model, it will fail because the seed model is not hydrated. - """ - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.load() - context.plan("prod", auto_apply=True, no_prompts=True) - - # Make sure that a different version of the seed model is loaded - seed_model = context.get_model("sushi.waiter_names") - seed_model = seed_model.copy(update={"stamp": "force new version"}) - context.upsert_model(seed_model) - - # Make a change to the downstream model - model = context.get_model("sushi.waiter_as_customer_by_day") - model = model.copy(update={"stamp": "force new version"}) - context.upsert_model(model) - - # It is important to clear the cache so that the hydrated seed model is not sourced from the cache - context.clear_caches() - - # Make sure to use the selector so that the seed model is sourced from the state - plan = context.plan_builder("dev", select_models=[model.name]).build() - assert len(plan.directly_modified) == 1 - assert list(plan.directly_modified)[0].name == model.fqn - assert len(plan.missing_intervals) == 1 - assert plan.missing_intervals[0].snapshot_id.name == model.fqn - - # Make sure there's no error when applying the plan - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_change_standalone_audit( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - # Change a model upstream from a standalone audit - model = context.get_model("sushi.items") - model = model.copy(update={"stamp": "force new version"}) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - - # Make sure the standalone audit is among modified - assert ( - context.get_snapshot("assert_item_price_above_zero").snapshot_id - in plan.indirectly_modified[context.get_snapshot("sushi.items").snapshot_id] - ) - - # Make sure there's no error when applying the plan - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_seed_model_change_schema( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - new_csv = [] - with open(context.path / "seeds" / "waiter_names.csv", "r") as fd: - is_header = True - for idx, line in enumerate(fd): - line = line.strip() - if not line: - continue - if is_header: - new_csv.append(line + ",new_column") - is_header = False - else: - new_csv.append(line + f",v{idx}") - - with open(context.path / "seeds" / "waiter_names.csv", "w") as fd: - fd.write("\n".join(new_csv)) - - context.load() - - downstream_model = context.get_model("sushi.waiter_as_customer_by_day") - downstream_model_kind = downstream_model.kind.dict() - downstream_model_kwargs = { - **downstream_model.dict(), - "kind": { - **downstream_model_kind, - "on_destructive_change": "allow", - }, - "audits": [], - # Use the new column - "query": "SELECT '2023-01-07' AS event_date, new_column AS new_column FROM sushi.waiter_names", - } - context.upsert_model(SqlModel.parse_obj(downstream_model_kwargs)) - - context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True, enable_preview=True) - - assert ( - context.engine_adapter.fetchone( - "SELECT COUNT(*) FROM sushi__dev.waiter_as_customer_by_day" - )[0] - == len(new_csv) - 1 - ) - - # Deploy to prod - context.clear_caches() - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - assert "new_column" in context.engine_adapter.columns("sushi.waiter_as_customer_by_day") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_plan_ignores_changes(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - restated_snapshot = context.get_snapshot("sushi.top_waiters") - - # Simulate a change. - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - plan = context.plan_builder(restate_models=["sushi.top_waiters"]).build() - assert plan.snapshots != context.snapshots - - assert not plan.directly_modified - assert not plan.has_changes - assert not plan.new_snapshots - assert plan.requires_backfill - assert plan.restatements == { - restated_snapshot.snapshot_id: (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) - } - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=restated_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ) - ] - - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_plan_across_environments_snapshot_with_shared_version( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - # Change kind to incremental unmanaged - model = context.get_model("sushi.waiter_revenue_by_day") - previous_kind = model.kind.copy(update={"forward_only": True}) - assert isinstance(previous_kind, IncrementalByTimeRangeKind) - - model = model.copy( - update={ - "kind": IncrementalUnmanagedKind(), - "physical_version": "pinned_version_12345", - "partitioned_by_": [exp.column("event_date")], - } - ) - context.upsert_model(model) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Make some change and deploy it to both dev and prod environments - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - context.plan("dev_a", auto_apply=True, no_prompts=True) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Change the kind back to incremental by time range and deploy to prod - model = model.copy(update={"kind": previous_kind}) - context.upsert_model(model) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Restate the model and verify that the interval hasn't been expanded because of the old snapshot - # with the same version - context.plan( - restate_models=["sushi.waiter_revenue_by_day"], - start="2023-01-06", - end="2023-01-08", - auto_apply=True, - no_prompts=True, - ) - - assert ( - context.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL AND event_date < '2023-01-06'" - )["cnt"][0] - == 0 - ) - plan = context.plan_builder("prod").build() - assert not plan.missing_intervals - - -def test_restatement_plan_hourly_with_downstream_daily_restates_correct_intervals(tmp_path: Path): - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind FULL, - cron '@daily' - ); - - select account_id, ts from test.a; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply - ctx.plan(auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} wasnt cleared" - - # Put some data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 01:30:00", - "2024-01-01 23:30:00", - "2024-01-02 03:30:00", - "2024-01-03 12:30:00", - ], - } - ) - engine_adapter.replace_query( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # Restate A across a day boundary with the expectation that two day intervals in B are affected - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 02:00:00", - end="2024-01-02 04:00:00", - auto_apply=True, - no_prompts=True, - ) - - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", # present already - # "2024-01-01 02:30:00", #removed in last restatement - "2024-01-01 23:30:00", # added in last restatement - "2024-01-02 03:30:00", # added in last restatement - ], f"Table {tbl} wasnt cleared" - - -def test_restatement_plan_respects_disable_restatements(tmp_path: Path): - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01', - cron '@daily' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts", - disable_restatement true, - ), - start '2024-01-01', - cron '@daily' - ); - - select account_id, ts from test.a; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply - ctx.plan(auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - def get_snapshot_intervals(snapshot_id): - return list(ctx.state_sync.get_snapshots([snapshot_id]).values())[0].intervals - - # verify initial state - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A and expect b to be ignored - starting_b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01", - end="2024-01-02", - auto_apply=True, - no_prompts=True, - ) - - # verify A was changed and not b - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - assert _dates_in_table("test.b") == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # Verify B intervals were not touched - b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) - assert starting_b_intervals == b_intervals - - -def test_restatement_plan_clears_correct_intervals_across_environments(tmp_path: Path): - model1 = """ - MODEL ( - name test.incremental_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "date" - ), - start '2024-01-01', - cron '@daily' - ); - - select account_id, date from test.external_table; - """ - - model2 = """ - MODEL ( - name test.downstream_of_incremental, - kind FULL - ); - - select account_id, date from test.incremental_model; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "model1.sql", "w") as f: - f.write(model1) - - with open(models_dir / "model2.sql", "w") as f: - f.write(model2) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004, 1005], - "name": ["foo", "bar", "baz", "bing", "bong"], - "date": ["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05"], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "name": exp.DataType.build("varchar"), - "date": exp.DataType.build("date"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # first, create the prod models - ctx.plan(auto_apply=True, no_prompts=True) - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) - assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (5,) - assert not engine_adapter.table_exists("test__dev.incremental_model") - - # then, make a dev version - model1 = """ - MODEL ( - name test.incremental_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "date" - ), - start '2024-01-01', - cron '@daily' - ); - - select 1 as account_id, date from test.external_table; - """ - with open(models_dir / "model1.sql", "w") as f: - f.write(model1) - ctx.load() - - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - assert engine_adapter.table_exists("test__dev.incremental_model") - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (5,) - - # drop some source data so when we restate the interval it essentially clears it which is easy to verify - engine_adapter.execute("delete from test.external_table where date = '2024-01-01'") - assert engine_adapter.fetchone("select count(*) from test.external_table") == (4,) - - # now, restate intervals in dev and verify prod is NOT affected - ctx.plan( - environment="dev", - start="2024-01-01", - end="2024-01-02", - restate_models=["test.incremental_model"], - auto_apply=True, - no_prompts=True, - ) - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-01'" - ) == (1,) - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-01'" - ) == (0,) - - # prod still should not be affected by a run because the restatement only happened in dev - ctx.run() - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-01'" - ) == (1,) - - # drop another interval from the source data - engine_adapter.execute("delete from test.external_table where date = '2024-01-02'") - - # now, restate intervals in prod and verify that dev IS affected - ctx.plan( - start="2024-01-01", - end="2024-01-03", - restate_models=["test.incremental_model"], - auto_apply=True, - no_prompts=True, - ) - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (3,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-01'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-02'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-03'" - ) == (1,) - - # dev not affected yet until `sqlmesh run` is run - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-01'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-02'" - ) == (1,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-03'" - ) == (1,) - - # the restatement plan for prod should have cleared dev intervals too, which means this `sqlmesh run` re-runs 2024-01-01 and 2024-01-02 - ctx.run(environment="dev") - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (3,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-01'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-02'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-03'" - ) == (1,) - - # the downstream full model should always reflect whatever the incremental model is showing - assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (3,) - assert engine_adapter.fetchone("select count(*) from test__dev.downstream_of_incremental") == ( - 3, - ) - - -def test_prod_restatement_plan_clears_correct_intervals_in_derived_dev_tables(tmp_path: Path): - """ - Scenario: - I have models A[hourly] <- B[daily] <- C in prod - I create dev and add 2 new models D and E so that my dev DAG looks like A <- B <- C <- D[daily] <- E - I prod, I restate *one hour* of A - Outcome: - D and E should be restated in dev despite not being a part of prod - since B and D are daily, the whole day should be restated even though only 1hr of the upstream model was restated - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - def _derived_full_model_def(name: str, upstream: str) -> str: - return f""" - MODEL ( - name test.{name}, - kind FULL - ); - - select account_id, ts from test.{upstream}; - """ - - def _derived_incremental_model_def(name: str, upstream: str) -> str: - return f""" - MODEL ( - name test.{name}, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.{upstream} where ts between @start_ts and @end_ts; - """ - - model_b = _derived_incremental_model_def("b", upstream="a") - model_c = _derived_full_model_def("c", upstream="b") - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a, "b.sql": model_b, "c.sql": model_c}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A, B, C in prod - ctx.plan(auto_apply=True, no_prompts=True) - - # add D[daily], E in dev - model_d = _derived_incremental_model_def("d", upstream="c") - model_e = _derived_full_model_def("e", upstream="d") - - for path, defn in { - "d.sql": model_d, - "e.sql": model_e, - }.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test.b", "test.c", "test__dev.d", "test__dev.e"]: - assert engine_adapter.table_exists(tbl) - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - for tbl in ["test.d", "test.e"]: - assert not engine_adapter.table_exists(tbl) - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - for tbl in ["test.a", "test.b", "test.c"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} wasnt cleared" - - # dev shouldnt have been affected yet - for tbl in ["test__dev.d", "test__dev.e"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} was prematurely cleared" - - # run dev to trigger the processing of the prod restatement - ctx.run(environment="dev") - - # data should now be cleared from dev - # note that D is a daily model, so clearing an hour interval from A should have triggered the full day in D - for tbl in ["test__dev.d", "test__dev.e"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} wasnt cleared" - - -def test_prod_restatement_plan_clears_unaligned_intervals_in_derived_dev_tables(tmp_path: Path): - """ - Scenario: - I have a model A[hourly] in prod - I create dev and add a model B[daily] - I prod, I restate *one hour* of A - - Outcome: - The whole day for B should be restated. The restatement plan for prod has no hints about B's cadence because - B only exists in dev and there are no other downstream models in prod that would cause the restatement intervals - to be widened. - - Therefore, this test checks that SQLMesh does the right thing when an interval is partially cleared - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.a where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[hourly] in prod - ctx.plan(auto_apply=True, no_prompts=True) - - # add B[daily] in dev - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # dev shouldnt have been affected yet - assert _dates_in_table("test__dev.b") == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # mess with A independently of SQLMesh to prove a whole day gets restated for B instead of just 1hr - snapshot_table_name = ctx.table_name("test.a", "dev") - engine_adapter.execute( - f"delete from {snapshot_table_name} where cast(ts as date) == '2024-01-01'" - ) - engine_adapter.execute( - f"insert into {snapshot_table_name} (account_id, ts) values (1007, '2024-01-02 01:30:00')" - ) - - assert _dates_in_table("test.a") == ["2024-01-02 00:30:00", "2024-01-02 01:30:00"] - - # run dev to trigger the processing of the prod restatement - ctx.run(environment="dev") - - # B should now have no data for 2024-01-01 - # To prove a single day was restated vs the whole model, it also shouldnt have the '2024-01-02 01:30:00' record - assert _dates_in_table("test__dev.b") == ["2024-01-02 00:30:00"] - - -def test_prod_restatement_plan_causes_dev_intervals_to_be_processed_in_next_dev_plan( - tmp_path: Path, -): - """ - Scenario: - I have a model A[hourly] in prod - I create dev and add a model B[daily] - I prod, I restate *one hour* of A - In dev, I run a normal plan instead of a cadence run - - Outcome: - The whole day for B should be restated as part of a normal plan - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.a where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[hourly] in prod - ctx.plan(auto_apply=True, no_prompts=True) - - # add B[daily] in dev - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # dev shouldnt have been affected yet - assert _dates_in_table("test__dev.b") == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # plan dev which should trigger the missing intervals to get repopulated - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - # dev should have the restated data - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - -def test_prod_restatement_plan_causes_dev_intervals_to_be_widened_on_full_restatement_only_model( - tmp_path, -): - """ - Scenario: - I have am INCREMENTAL_BY_TIME_RANGE model A[daily] in prod - I create dev and add a INCREMENTAL_BY_UNIQUE_KEY model B (which supports full restatement only) - I prod, I restate one day of A which should cause intervals in dev to be cleared (but not processed) - In dev, I run a plan - - Outcome: - In the dev plan, the entire model for B should be rebuilt because it does not support partial restatement - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select account_id, ts from test.external_table where ts between @start_ts and @end_ts; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key (account_id, ts) - ), - cron '@daily' - ); - - select account_id, ts from test.a where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-02 01:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[daily] in prod - ctx.plan(auto_apply=True) - - # add B[daily] in dev - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-02 01:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-02 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-02 00:00:00", - end="2024-01-03 00:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - # dev shouldnt have been affected yet - assert _dates_in_table("test__dev.b") == [ - "2024-01-01 00:30:00", - "2024-01-02 01:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - # plan dev which should trigger the missing intervals to get repopulated - ctx.plan(environment="dev", auto_apply=True) - - # dev should have fully refreshed - # this is proven by the fact that INCREMENTAL_BY_UNIQUE_KEY cant propagate deletes, so if the - # model was not fully rebuilt, the deleted record would still be present - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - -def test_prod_restatement_plan_missing_model_in_dev( - tmp_path: Path, -): - """ - Scenario: - I have a model B in prod but only model A in dev - I restate B in prod - - Outcome: - The A model should be ignore and the plan shouldn't fail - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.external_table where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[hourly] in dev - ctx.plan("dev", auto_apply=True, no_prompts=True) - - # add B[daily] in prod and remove A - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - Path(models_dir / "a.sql").unlink() - - # plan + apply dev - ctx.load() - ctx.plan(auto_apply=True, no_prompts=True) - - # restate B in prod - ctx.plan( - restate_models=["test.b"], - start="2024-01-01", - end="2024-01-02", - auto_apply=True, - no_prompts=True, - ) - - -def test_prod_restatement_plan_includes_related_unpromoted_snapshots(tmp_path: Path): - """ - Scenario: - - I have models A <- B in prod - - I have models A <- B <- C in dev - - Both B and C have gone through a few iterations in dev so multiple snapshot versions exist - for them but not all of them are promoted / active - - I restate A in prod - - Outcome: - - Intervals should be cleared for all of the versions of B and C, regardless - of if they are active in any particular environment, in case they ever get made - active - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - (models_dir / "a.sql").write_text(""" - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select 1 as a, now() as ts; - """) - - (models_dir / "b.sql").write_text(""" - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select a, ts from test.a - """) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01")) - ctx = Context(paths=[tmp_path], config=config) - - def _all_snapshots() -> t.Dict[SnapshotId, Snapshot]: - all_snapshot_ids = [ - SnapshotId(name=name, identifier=identifier) - for (name, identifier) in ctx.state_sync.state_sync.engine_adapter.fetchall( # type: ignore - "select name, identifier from sqlmesh._snapshots" - ) - ] - return ctx.state_sync.get_snapshots(all_snapshot_ids) - - # plan + apply prod - ctx.plan(environment="prod", auto_apply=True) - assert len(_all_snapshots()) == 2 - - # create dev with new version of B - (models_dir / "b.sql").write_text(""" - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select a, ts, 'b dev 1' as change from test.a - """) - - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - assert len(_all_snapshots()) == 3 - - # update B (new version) and create C - (models_dir / "b.sql").write_text(""" - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select a, ts, 'b dev 2' as change from test.a - """) - - (models_dir / "c.sql").write_text(""" - MODEL ( - name test.c, - kind FULL, - cron '@daily' - ); - - select *, 'c initial' as val from test.b - """) - - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - assert len(_all_snapshots()) == 5 - - # update C (new version), create D (unrelated) - (models_dir / "c.sql").write_text(""" - MODEL ( - name test.c, - kind FULL, - cron '@daily' - ); - - select *, 'c updated' as val from test.b - """) - - (models_dir / "d.sql").write_text(""" - MODEL ( - name test.d, - cron '@daily' - ); - - select 1 as unrelated - """) - - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - all_snapshots_prior_to_restatement = _all_snapshots() - assert len(all_snapshots_prior_to_restatement) == 7 - - def _snapshot_instances(lst: t.Dict[SnapshotId, Snapshot], name_match: str) -> t.List[Snapshot]: - return [s for s_id, s in lst.items() if name_match in s_id.name] - - # verify initial state - - # 1 instance of A (prod) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"a"')) == 1 - - # 3 instances of B (original in prod + 2 updates in dev) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"b"')) == 3 - - # 2 instances of C (initial + update in dev) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"c"')) == 2 - - # 1 instance of D (initial - dev) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"d"')) == 1 - - # restate A in prod - ctx.plan(environment="prod", restate_models=['"memory"."test"."a"'], auto_apply=True) - - all_snapshots_after_restatement = _all_snapshots() - - # All versions of B and C in dev should have had intervals cleared - # D in dev should not be touched and A + B in prod shoud also not be touched - a = _snapshot_instances(all_snapshots_after_restatement, '"a"') - assert len(a) == 1 - - b = _snapshot_instances(all_snapshots_after_restatement, '"b"') - # the 1 B instance in prod should be populated and 2 in dev (1 active) should be cleared - assert len(b) == 3 - assert len([s for s in b if not s.intervals]) == 2 - - c = _snapshot_instances(all_snapshots_after_restatement, '"c"') - # the 2 instances of C in dev (1 active) should be cleared - assert len(c) == 2 - assert len([s for s in c if not s.intervals]) == 2 - - d = _snapshot_instances(all_snapshots_after_restatement, '"d"') - # D should not be touched since it's in no way downstream of A in prod - assert len(d) == 1 - assert d[0].intervals - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dev_restatement_of_prod_model(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) - - restatement_plan = context.plan_builder("dev", restate_models=["*"]).build() - assert set(restatement_plan.restatements) == { - context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, - context.get_snapshot("sushi.top_waiters").snapshot_id, - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_snapshot_table_exists_for_promoted_snapshot(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) - - # Drop the views and make sure SQLMesh recreates them later - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - context.engine_adapter.drop_view(top_waiters_snapshot.table_name()) - context.engine_adapter.drop_view(top_waiters_snapshot.table_name(False)) - - # Make the environment unfinalized to force recreation of all views in the virtual layer - context.state_sync.state_sync.engine_adapter.execute( - "UPDATE sqlmesh._environments SET finalized_ts = NULL WHERE name = 'dev'" - ) - - context.plan( - "prod", - restate_models=["sushi.top_waiters"], - auto_apply=True, - no_prompts=True, - skip_tests=True, - ) - assert context.engine_adapter.table_exists(top_waiters_snapshot.table_name()) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_against_expired_environment(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - modified_models = {model.fqn, context.get_model("sushi.top_waiters").fqn} - - plan = context.plan_builder("dev").build() - assert plan.has_changes - assert set(plan.context_diff.modified_snapshots) == modified_models - assert plan.missing_intervals - context.apply(plan) - - # Make sure there are no changes when comparing against the existing environment. - plan = context.plan_builder("dev").build() - assert not plan.has_changes - assert not plan.context_diff.modified_snapshots - assert not plan.missing_intervals - - # Invalidate the environment and make sure that the plan detects the changes. - context.invalidate_environment("dev") - plan = context.plan_builder("dev").build() - assert plan.has_changes - assert set(plan.context_diff.modified_snapshots) == modified_models - assert not plan.missing_intervals - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_new_forward_only_model_concurrent_versions(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - - # Add the first version of the model and apply it to dev_a. - context.upsert_model(new_model) - snapshot_a = context.get_snapshot(new_model.name) - plan_a = context.plan_builder("dev_a").build() - snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] - - assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots - assert snapshot_a.snapshot_id in plan_a.context_diff.added - assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING - - context.apply(plan_a) - - new_model_alt_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS b; - """ - ) - new_model_alt = load_sql_based_model(new_model_alt_expr) - - # Add the second version of the model but don't apply it yet - context.upsert_model(new_model_alt) - snapshot_b = context.get_snapshot(new_model_alt.name) - plan_b = context.plan_builder("dev_b").build() - snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] - - assert snapshot_b.snapshot_id in plan_b.context_diff.new_snapshots - assert snapshot_b.snapshot_id in plan_b.context_diff.added - assert snapshot_b.change_category == SnapshotChangeCategory.BREAKING - - assert snapshot_b.fingerprint != snapshot_a.fingerprint - assert snapshot_b.version == snapshot_a.version - - # Apply the 1st version to prod - context.upsert_model(new_model) - plan_prod_a = context.plan_builder("prod").build() - assert snapshot_a.snapshot_id in plan_prod_a.snapshots - assert ( - plan_prod_a.snapshots[snapshot_a.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - context.apply(plan_prod_a) - - df = context.fetchdf("SELECT * FROM memory.sushi.new_model") - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} - - # Modify the 1st version in prod to trigger a forward-only change - new_model = add_projection_to_model(t.cast(SqlModel, new_model)) - context.upsert_model(new_model) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Apply the 2nd version to dev_b. - # At this point the snapshot of the 2nd version has already been categorized but not - # persisted in the state. This means that when the snapshot of the 1st version was - # being unpaused during promotion to prod, the state of the 2nd version snapshot was not updated - context.apply(plan_b) - - # Apply the 2nd version to prod - context.upsert_model(new_model_alt) - plan_prod_b = context.plan_builder("prod").build() - assert ( - plan_prod_b.snapshots[snapshot_b.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert not plan_prod_b.requires_backfill - context.apply(plan_prod_b) - - df = context.fetchdf("SELECT * FROM memory.sushi.new_model").replace({np.nan: None}) - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: None}} - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_new_forward_only_model_same_dev_environment(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - - # Add the first version of the model and apply it to dev. - context.upsert_model(new_model) - snapshot_a = context.get_snapshot(new_model.name) - plan_a = context.plan_builder("dev").build() - snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] - - assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots - assert snapshot_a.snapshot_id in plan_a.context_diff.added - assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING - - context.apply(plan_a) - - df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model") - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} - - new_model_alt_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS b; - """ - ) - new_model_alt = load_sql_based_model(new_model_alt_expr) - - # Add the second version of the model and apply it to the same environment. - context.upsert_model(new_model_alt) - snapshot_b = context.get_snapshot(new_model_alt.name) - - context.invalidate_environment("dev", sync=True) - plan_b = context.plan_builder("dev").build() - snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] - - context.apply(plan_b) - - df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model").replace({np.nan: None}) - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: 1}} - - -@time_machine.travel("2023-01-08 01:00:00 UTC") -def test_run_auto_restatement(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - context.engine_adapter.execute( - "CREATE TABLE _test_auto_restatement_intervals (name STRING, start_ds STRING, end_ds STRING)" - ) - - @macro() - def record_intervals( - evaluator, name: exp.Expression, start: exp.Expression, end: exp.Expression, **kwargs: t.Any - ) -> None: - if evaluator.runtime_stage == "evaluating": - evaluator.engine_adapter.insert_append( - "_test_auto_restatement_intervals", - pd.DataFrame({"name": [name.name], "start_ds": [start.name], "end_ds": [end.name]}), - ) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday - auto_restatement_intervals 3, - ), - start '2023-01-01', - ); - - @record_intervals('new_model', @start_ds, @end_ds); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - context.upsert_model(new_model) - - new_model_downstream_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model_downstream, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - ), - cron '@hourly', - ); - - @record_intervals('new_model_downstream', @start_ts, @end_ts); - - SELECT * FROM memory.sushi.new_model; - """ - ) - new_model_downstream = load_sql_based_model(new_model_downstream_expr) - context.upsert_model(new_model_downstream) - - plan = context.plan_builder("prod").build() - context.apply(plan) - - with time_machine.travel("2023-01-08 06:01:00 UTC"): - assert context.run() - - recorded_intervals_df = context.engine_adapter.fetchdf( - "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model'" - ) - # The first interval is the first backfill and the second interval should be the 3 auto restated intervals - assert recorded_intervals_df.to_dict() == { - "start_ds": {0: "2023-01-01", 1: "2023-01-05"}, - "end_ds": {0: "2023-01-07", 1: "2023-01-07"}, - } - recorded_intervals_downstream_df = context.engine_adapter.fetchdf( - "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model_downstream'" - ) - # The first interval is the first backfill, the second interval should be the 3 days of restated intervals, and - # the third interval should catch up to the current hour - assert recorded_intervals_downstream_df.to_dict() == { - "start_ds": { - 0: "2023-01-01 00:00:00", - 1: "2023-01-05 00:00:00", - 2: "2023-01-08 01:00:00", - }, - "end_ds": { - 0: "2023-01-08 00:59:59.999999", - 1: "2023-01-07 23:59:59.999999", - 2: "2023-01-08 05:59:59.999999", - }, - } - - snapshot = context.get_snapshot(new_model.name) - snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ - snapshot.snapshot_id - ] - assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") - assert not snapshot.pending_restatement_intervals - - snapshot_downstream = context.get_snapshot(new_model_downstream.name) - snapshot_downstream = context.state_sync.state_sync.get_snapshots( - [snapshot_downstream.snapshot_id] - )[snapshot_downstream.snapshot_id] - assert not snapshot_downstream.next_auto_restatement_ts - assert not snapshot_downstream.pending_restatement_intervals - - -@time_machine.travel("2023-01-08 01:00:00 UTC") -def test_run_auto_restatement_plan_preview(init_and_plan_context: t.Callable): - context, init_plan = init_and_plan_context("examples/sushi") - context.apply(init_plan) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - auto_restatement_cron '0 6 * * 7', - ), - start '2023-01-01', - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - context.upsert_model(new_model) - snapshot = context.get_snapshot(new_model.name) - - plan_dev = context.plan_builder("dev").build() - # Make sure that a limited preview is computed by default - assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") - assert plan_dev.missing_intervals == [ - SnapshotIntervals( - snapshot.snapshot_id, - [(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ) - ] - assert not plan_dev.deployability_index.is_deployable(snapshot.snapshot_id) - context.apply(plan_dev) - - plan_prod = context.plan_builder("prod").build() - assert plan_prod.missing_intervals == [ - SnapshotIntervals( - context.get_snapshot(new_model.name).snapshot_id, - [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ) - ] - context.apply(plan_prod) - - -@time_machine.travel("2023-01-08 01:00:00 UTC") -def test_run_auto_restatement_failure(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - @macro() - def fail_auto_restatement(evaluator, start: exp.Expression, **kwargs: t.Any) -> None: - if evaluator.runtime_stage == "evaluating" and start.name != "2023-01-01": - raise Exception("Failed") - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday - auto_restatement_intervals 3, - ), - start '2023-01-01', - ); - - @fail_auto_restatement(@start_ds); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - context.upsert_model(new_model) - - plan = context.plan_builder("prod").build() - context.apply(plan) - - with time_machine.travel("2023-01-08 06:01:00 UTC"): - run_status = context.run() - assert run_status.is_failure - - snapshot = context.get_snapshot(new_model.name) - snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ - snapshot.snapshot_id - ] - assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") - assert snapshot.pending_restatement_intervals == [ - (to_timestamp("2023-01-05"), to_timestamp("2023-01-08")) - ] - - -def test_plan_twice_with_star_macro_yields_no_diff(tmp_path: Path): - init_example_project(tmp_path, engine_type="duckdb") - - star_model_definition = """ - MODEL ( - name sqlmesh_example.star_model, - kind FULL - ); - - SELECT @STAR(sqlmesh_example.full_model) FROM sqlmesh_example.full_model - """ - - star_model_path = tmp_path / "models" / "star_model.sql" - star_model_path.write_text(star_model_definition) - - db_path = str(tmp_path / "db.db") - config = Config( - gateways={"main": GatewayConfig(connection=DuckDBConnectionConfig(database=db_path))}, - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - ) - context = Context(paths=tmp_path, config=config) - context.plan(auto_apply=True, no_prompts=True) - - # Instantiate new context to remove caches etc - new_context = Context(paths=tmp_path, config=config) - - star_model = new_context.get_model("sqlmesh_example.star_model") - assert ( - star_model.render_query_or_raise().sql() - == 'SELECT CAST("full_model"."item_id" AS INT) AS "item_id", CAST("full_model"."num_orders" AS BIGINT) AS "num_orders" FROM "db"."sqlmesh_example"."full_model" AS "full_model"' - ) - - new_plan = new_context.plan_builder().build() - assert not new_plan.has_changes - assert not new_plan.new_snapshots - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_create_environment_no_changes_with_selector(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - with pytest.raises(NoChangesPlanError): - context.plan_builder("dev").build() - - plan = context.plan_builder("dev", select_models=["*top_waiters"]).build() - assert not plan.missing_intervals - context.apply(plan) - - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == {"top_waiters"} - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_empty_backfill(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - plan = context.plan_builder("prod", skip_tests=True, empty_backfill=True).build() - assert plan.missing_intervals - assert plan.empty_backfill - assert not plan.requires_backfill - - context.apply(plan) - - for model in context.models.values(): - if model.is_seed or model.kind.is_symbolic: - continue - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM {model.name}")[0] - assert row_num == 0 - - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.requires_backfill - assert not plan.has_changes - assert not plan.missing_intervals - - snapshots = plan.snapshots - for snapshot in snapshots.values(): - if not snapshot.intervals: - continue - assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_empty_backfill_new_model(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - new_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind FULL, - cron '0 8 * * *', - start '2023-01-01', - ); - - SELECT 1 AS one; - """ - ) - ) - new_model_name = context.upsert_model(new_model).fqn - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - plan = context.plan_builder("dev", skip_tests=True, empty_backfill=True).build() - assert plan.end == to_datetime("2023-01-09") - assert plan.missing_intervals - assert plan.empty_backfill - assert not plan.requires_backfill - - context.apply(plan) - - for model in context.models.values(): - if model.is_seed or model.kind.is_symbolic: - continue - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.new_model")[ - 0 - ] - assert row_num == 0 - - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.requires_backfill - assert not plan.missing_intervals - - snapshots = plan.snapshots - for snapshot in snapshots.values(): - if not snapshot.intervals: - continue - elif snapshot.name == new_model_name: - assert snapshot.intervals[-1][1] == to_timestamp("2023-01-09") - else: - assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@pytest.mark.parametrize("forward_only", [False, True]) -def test_plan_repairs_unrenderable_snapshot_state( - init_and_plan_context: t.Callable, forward_only: bool -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - target_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert target_snapshot - - # Manually corrupt the snapshot's query - raw_snapshot = context.state_sync.state_sync.engine_adapter.fetchone( - f"SELECT snapshot FROM sqlmesh._snapshots WHERE name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'" - )[0] # type: ignore - parsed_snapshot = json.loads(raw_snapshot) - parsed_snapshot["node"]["query"] = "SELECT @missing_macro()" - context.state_sync.state_sync.engine_adapter.update_table( - "sqlmesh._snapshots", - {"snapshot": json.dumps(parsed_snapshot)}, - f"name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'", - ) - - context.clear_caches() - target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ - target_snapshot.snapshot_id - ] - - with pytest.raises(Exception): - target_snapshot_in_state.model.render_query_or_raise() - - # Repair the snapshot by creating a new version of it - context.upsert_model(target_snapshot.model.name, stamp="repair") - target_snapshot = context.get_snapshot(target_snapshot.name) - - plan_builder = context.plan_builder("prod", forward_only=forward_only) - plan = plan_builder.build() - if not forward_only: - assert target_snapshot.snapshot_id in {i.snapshot_id for i in plan.missing_intervals} - assert plan.directly_modified == {target_snapshot.snapshot_id} - plan_builder.set_choice(target_snapshot, SnapshotChangeCategory.NON_BREAKING) - plan = plan_builder.build() - - context.apply(plan) - - context.clear_caches() - assert context.get_snapshot(target_snapshot.name).model.render_query_or_raise() - target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ - target_snapshot.snapshot_id - ] - assert target_snapshot_in_state.model.render_query_or_raise() - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_no_backfill_for_model_downstream_of_metadata_change(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - # Make sushi.waiter_revenue_by_day a forward-only model. - forward_only_model = context.get_model("sushi.waiter_revenue_by_day") - updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) - forward_only_model = forward_only_model.copy(update={"kind": updated_model_kind}) - context.upsert_model(forward_only_model) - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Make a metadata change upstream of the forward-only model. - context.upsert_model("sushi.orders", owner="new_owner") - - plan = context.plan_builder("test_dev").build() - assert plan.has_changes - assert not plan.directly_modified - assert not plan.indirectly_modified - assert not plan.missing_intervals - assert all( - snapshot.change_category == SnapshotChangeCategory.METADATA - for snapshot in plan.new_snapshots - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_evaluate_uncategorized_snapshot(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Add a new projection - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - # Downstream model references the new projection - downstream_model = context.get_model("sushi.top_waiters") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_model), literal=False)) - - df = context.evaluate( - "sushi.top_waiters", start="2023-01-05", end="2023-01-06", execution_time=now() - ) - assert set(df["one"].tolist()) == {1} - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_table_name(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert snapshot - assert ( - context.table_name("sushi.waiter_revenue_by_day", "prod") - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" - ) - - with pytest.raises(SQLMeshError, match="Environment 'dev' was not found."): - context.table_name("sushi.waiter_revenue_by_day", "dev") - - with pytest.raises( - SQLMeshError, match="Model 'sushi.missing' was not found in environment 'prod'." - ): - context.table_name("sushi.missing", "prod") - - # Add a new projection - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("dev_a", auto_apply=True, no_prompts=True, skip_tests=True) - - new_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert new_snapshot.version != snapshot.version - - assert ( - context.table_name("sushi.waiter_revenue_by_day", "dev_a") - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{new_snapshot.version}" - ) - - # Make a forward-only change - context.upsert_model(model, stamp="forward_only") - - context.plan("dev_b", auto_apply=True, no_prompts=True, skip_tests=True, forward_only=True) - - forward_only_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert forward_only_snapshot.version == snapshot.version - assert forward_only_snapshot.dev_version != snapshot.version - - assert ( - context.table_name("sushi.waiter_revenue_by_day", "dev_b") - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{forward_only_snapshot.dev_version}__dev" - ) - - assert ( - context.table_name("sushi.waiter_revenue_by_day", "dev_b", prod=True) - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_full_model_change_with_plan_start_not_matching_model_start( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.top_waiters") - context.upsert_model(model, kind=model_kind_type_from_name("FULL")()) # type: ignore - - # Apply the change with --skip-backfill first and no plan start - context.plan("dev", skip_tests=True, skip_backfill=True, no_prompts=True, auto_apply=True) - - # Apply the plan again but this time don't skip backfill and set start - # to be later than the model start - context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, start="1 day ago") - - # Check that the number of rows is not 0 - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.top_waiters")[0] - assert row_num > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_view_is_updated_with_new_table_references( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Add a new projection to the base model - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Run the janitor to delete the old snapshot record - context.run_janitor(ignore_ttl=True) - - # Check the downstream view and make sure it's still queryable - assert context.get_model("sushi.top_waiters").kind.is_view - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi.top_waiters")[0] - assert row_num > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_explain(init_and_plan_context: t.Callable): - old_console = get_console() - set_console(TerminalConsole()) - - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") - waiter_revenue_by_day_model = add_projection_to_model( - t.cast(SqlModel, waiter_revenue_by_day_model) - ) - context.upsert_model(waiter_revenue_by_day_model) - - waiter_revenue_by_day_snapshot = context.get_snapshot(waiter_revenue_by_day_model.name) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters") - - common_kwargs = dict(skip_tests=True, no_prompts=True, explain=True) - - # For now just making sure the plan doesn't error - context.plan("dev", **common_kwargs) - context.plan("dev", **common_kwargs, skip_backfill=True) - context.plan("dev", **common_kwargs, empty_backfill=True) - context.plan("dev", **common_kwargs, forward_only=True, enable_preview=True) - context.plan("prod", **common_kwargs) - context.plan("prod", **common_kwargs, forward_only=True) - context.plan("prod", **common_kwargs, restate_models=[waiter_revenue_by_day_model.name]) - - set_console(old_console) - - # Make sure that the now changes were actually applied - for target_env in ("dev", "prod"): - plan = context.plan_builder(target_env, skip_tests=True).build() - assert plan.has_changes - assert plan.missing_intervals - assert plan.directly_modified == {waiter_revenue_by_day_snapshot.snapshot_id} - assert len(plan.new_snapshots) == 2 - assert {s.snapshot_id for s in plan.new_snapshots} == { - waiter_revenue_by_day_snapshot.snapshot_id, - top_waiters_snapshot.snapshot_id, - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_requirements(sushi_dbt_context: Context): - assert set(sushi_dbt_context.requirements) == {"dbt-core", "dbt-duckdb"} - assert sushi_dbt_context.requirements["dbt-core"].startswith("1.") - assert sushi_dbt_context.requirements["dbt-duckdb"].startswith("1.") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_dialect_with_normalization_strategy(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context( - "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" - ) - assert context.default_dialect == "duckdb,normalization_strategy=LOWERCASE" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_before_all_with_var_ref_source(init_and_plan_context: t.Callable): - _, plan = init_and_plan_context( - "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" - ) - environment_statements = plan.to_evaluatable().environment_statements - assert environment_statements - rendered_statements = [e.render_before_all(dialect="duckdb") for e in environment_statements] - assert rendered_statements[0] == [ - "CREATE TABLE IF NOT EXISTS analytic_stats (physical_table TEXT, evaluation_time TEXT)", - "CREATE TABLE IF NOT EXISTS to_be_executed_last (col TEXT)", - "SELECT 1 AS var, 'items' AS src, 'waiters' AS ref", - ] - - -@pytest.mark.parametrize( - "context_fixture", - ["sushi_context", "sushi_dbt_context", "sushi_test_dbt_context", "sushi_no_default_catalog"], -) -def test_model_add(context_fixture: Context, request): - initial_add(request.getfixturevalue(context_fixture), "dev") - - -def test_model_removed(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - - top_waiters_snapshot_id = sushi_context.get_snapshot( - "sushi.top_waiters", raise_if_missing=True - ).snapshot_id - - sushi_context._models.pop('"memory"."sushi"."top_waiters"') - - def _validate_plan(context, plan): - validate_plan_changes(plan, removed=[top_waiters_snapshot_id]) - assert not plan.missing_intervals - - def _validate_apply(context): - assert not sushi_context.get_snapshot("sushi.top_waiters", raise_if_missing=False) - assert sushi_context.state_reader.get_snapshots([top_waiters_snapshot_id]) - env = sushi_context.state_reader.get_environment(environment) - assert env - assert all(snapshot.name != '"memory"."sushi"."top_waiters"' for snapshot in env.snapshots) - - apply_to_environment( - sushi_context, - environment, - SnapshotChangeCategory.BREAKING, - plan_validators=[_validate_plan], - apply_validators=[_validate_apply], - ) - - -def test_non_breaking_change(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - validate_query_change(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING, False) - - -def test_breaking_change(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - validate_query_change(sushi_context, environment, SnapshotChangeCategory.BREAKING, False) - - -def test_logical_change(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - previous_sushi_items_version = sushi_context.get_snapshot( - "sushi.items", raise_if_missing=True - ).version - - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.FLOAT, - DataType.Type.DOUBLE, - ) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - assert ( - sushi_context.get_snapshot("sushi.items", raise_if_missing=True).version - == previous_sushi_items_version - ) - - -def validate_query_change( - context: Context, - environment: str, - change_category: SnapshotChangeCategory, - logical: bool, -): - versions = snapshots_to_versions(context.snapshots.values()) - - change_data_type( - context, - "sushi.items", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - - directly_modified = ['"memory"."sushi"."items"'] - indirectly_modified = [ - '"memory"."sushi"."order_items"', - '"memory"."sushi"."waiter_revenue_by_day"', - '"memory"."sushi"."customer_revenue_by_day"', - '"memory"."sushi"."customer_revenue_lifetime"', - '"memory"."sushi"."top_waiters"', - "assert_item_price_above_zero", - ] - not_modified = [ - snapshot.name - for snapshot in context.snapshots.values() - if snapshot.name not in directly_modified and snapshot.name not in indirectly_modified - ] - - if change_category == SnapshotChangeCategory.BREAKING and not logical: - models_same = not_modified - models_different = directly_modified + indirectly_modified - elif change_category == SnapshotChangeCategory.FORWARD_ONLY: - models_same = not_modified + directly_modified + indirectly_modified - models_different = [] - else: - models_same = not_modified + indirectly_modified - models_different = directly_modified - - def _validate_plan(context, plan): - validate_plan_changes(plan, modified=directly_modified + indirectly_modified) - assert bool(plan.missing_intervals) != logical - - def _validate_apply(context): - current_versions = snapshots_to_versions(context.snapshots.values()) - validate_versions_same(models_same, versions, current_versions) - validate_versions_different(models_different, versions, current_versions) - - apply_to_environment( - context, - environment, - change_category, - plan_validators=[_validate_plan], - apply_validators=[_validate_apply], - ) - - -@pytest.mark.parametrize( - "from_, to", - [ - (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.FULL), - (ModelKindName.FULL, ModelKindName.INCREMENTAL_BY_TIME_RANGE), - ], -) -def test_model_kind_change(from_: ModelKindName, to: ModelKindName, sushi_context: Context): - environment = f"test_model_kind_change__{from_.value.lower()}__{to.value.lower()}" - incremental_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True).copy() - - if from_ != ModelKindName.INCREMENTAL_BY_TIME_RANGE: - change_model_kind(sushi_context, from_) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - if to == ModelKindName.INCREMENTAL_BY_TIME_RANGE: - sushi_context.upsert_model(incremental_snapshot.model) - else: - change_model_kind(sushi_context, to) - - logical = to in (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.EMBEDDED) - validate_model_kind_change(to, sushi_context, environment, logical=logical) - - -def change_model_kind(context: Context, kind: ModelKindName): - if kind in (ModelKindName.VIEW, ModelKindName.EMBEDDED, ModelKindName.FULL): - context.upsert_model( - "sushi.items", - partitioned_by=[], - ) - context.upsert_model("sushi.items", kind=model_kind_type_from_name(kind)()) # type: ignore - - -def validate_model_kind_change( - kind_name: ModelKindName, - context: Context, - environment: str, - *, - logical: bool, -): - directly_modified = ['"memory"."sushi"."items"'] - indirectly_modified = [ - '"memory"."sushi"."order_items"', - '"memory"."sushi"."waiter_revenue_by_day"', - '"memory"."sushi"."customer_revenue_by_day"', - '"memory"."sushi"."customer_revenue_lifetime"', - '"memory"."sushi"."top_waiters"', - "assert_item_price_above_zero", - ] - if kind_name == ModelKindName.INCREMENTAL_BY_TIME_RANGE: - kind: ModelKind = IncrementalByTimeRangeKind(time_column=TimeColumn(column="event_date")) - elif kind_name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: - kind = IncrementalByUniqueKeyKind(unique_key="id") - else: - kind = model_kind_type_from_name(kind_name)() # type: ignore - - def _validate_plan(context, plan): - validate_plan_changes(plan, modified=directly_modified + indirectly_modified) - assert ( - next( - snapshot - for snapshot in plan.snapshots.values() - if snapshot.name == '"memory"."sushi"."items"' - ).model.kind.name - == kind.name - ) - assert bool(plan.missing_intervals) != logical - - apply_to_environment( - context, - environment, - SnapshotChangeCategory.NON_BREAKING, - plan_validators=[_validate_plan], - ) - - -def test_environment_isolation(sushi_context: Context): - prod_snapshots = sushi_context.snapshots.values() - - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - directly_modified = ['"memory"."sushi"."items"'] - indirectly_modified = [ - '"memory"."sushi"."order_items"', - '"memory"."sushi"."waiter_revenue_by_day"', - '"memory"."sushi"."customer_revenue_by_day"', - '"memory"."sushi"."customer_revenue_lifetime"', - '"memory"."sushi"."top_waiters"', - "assert_item_price_above_zero", - ] - - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) - - # Verify prod unchanged - validate_apply_basics(sushi_context, "prod", prod_snapshots) - - def _validate_plan(context, plan): - validate_plan_changes(plan, modified=directly_modified + indirectly_modified) - assert not plan.missing_intervals - - apply_to_environment( - sushi_context, - "prod", - SnapshotChangeCategory.BREAKING, - plan_validators=[_validate_plan], - ) - - -def test_environment_promotion(sushi_context: Context): - initial_add(sushi_context, "dev") - - # Simulate prod "ahead" - change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) - apply_to_environment(sushi_context, "prod", SnapshotChangeCategory.BREAKING) - - # Simulate rebase - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) - - # Make changes in dev - change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DECIMAL) - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.NON_BREAKING) - - change_data_type(sushi_context, "sushi.top_waiters", DataType.Type.DOUBLE, DataType.Type.INT) - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) - - change_data_type( - sushi_context, - "sushi.customer_revenue_by_day", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - apply_to_environment( - sushi_context, - "dev", - SnapshotChangeCategory.FORWARD_ONLY, - allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], - ) - - # Promote to prod - def _validate_plan(context, plan): - sushi_items_snapshot = context.get_snapshot("sushi.items", raise_if_missing=True) - sushi_top_waiters_snapshot = context.get_snapshot( - "sushi.top_waiters", raise_if_missing=True - ) - sushi_customer_revenue_by_day_snapshot = context.get_snapshot( - "sushi.customer_revenue_by_day", raise_if_missing=True - ) - - assert ( - plan.context_diff.modified_snapshots[sushi_items_snapshot.name][0].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.modified_snapshots[sushi_top_waiters_snapshot.name][0].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.modified_snapshots[sushi_customer_revenue_by_day_snapshot.name][ - 0 - ].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert plan.context_diff.snapshots[ - sushi_customer_revenue_by_day_snapshot.snapshot_id - ].is_forward_only - - apply_to_environment( - sushi_context, - "prod", - SnapshotChangeCategory.NON_BREAKING, - plan_validators=[_validate_plan], - allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], - ) - - -def test_no_override(sushi_context: Context) -> None: - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.INT, - DataType.Type.BIGINT, - ) - - change_data_type( - sushi_context, - "sushi.order_items", - DataType.Type.INT, - DataType.Type.BIGINT, - ) - - plan_builder = sushi_context.plan_builder("prod") - plan = plan_builder.build() - - sushi_items_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) - sushi_order_items_snapshot = sushi_context.get_snapshot( - "sushi.order_items", raise_if_missing=True - ) - sushi_water_revenue_by_day_snapshot = sushi_context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - - items = plan.context_diff.snapshots[sushi_items_snapshot.snapshot_id] - order_items = plan.context_diff.snapshots[sushi_order_items_snapshot.snapshot_id] - waiter_revenue = plan.context_diff.snapshots[sushi_water_revenue_by_day_snapshot.snapshot_id] - - plan_builder.set_choice(items, SnapshotChangeCategory.BREAKING).set_choice( - order_items, SnapshotChangeCategory.NON_BREAKING - ) - plan_builder.build() - assert items.is_new_version - assert waiter_revenue.is_new_version - plan_builder.set_choice(items, SnapshotChangeCategory.NON_BREAKING) - plan_builder.build() - assert not waiter_revenue.is_new_version - - -@pytest.mark.parametrize( - "change_categories, expected", - [ - ([SnapshotChangeCategory.NON_BREAKING], SnapshotChangeCategory.BREAKING), - ([SnapshotChangeCategory.BREAKING], SnapshotChangeCategory.BREAKING), - ( - [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.NON_BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ( - [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ( - [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.NON_BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ( - [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ], -) -def test_revert( - sushi_context: Context, - change_categories: t.List[SnapshotChangeCategory], - expected: SnapshotChangeCategory, -): - environment = "prod" - original_snapshot_id = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) - - types = (DataType.Type.DOUBLE, DataType.Type.FLOAT, DataType.Type.DECIMAL) - assert len(change_categories) < len(types) - - for i, category in enumerate(change_categories): - change_data_type(sushi_context, "sushi.items", *types[i : i + 2]) - apply_to_environment(sushi_context, environment, category) - assert ( - sushi_context.get_snapshot("sushi.items", raise_if_missing=True) != original_snapshot_id - ) - - change_data_type(sushi_context, "sushi.items", types[len(change_categories)], types[0]) - - def _validate_plan(_, plan): - snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') - assert snapshot.change_category == expected - assert not plan.missing_intervals - - apply_to_environment( - sushi_context, - environment, - change_categories[-1], - plan_validators=[_validate_plan], - ) - assert sushi_context.get_snapshot("sushi.items", raise_if_missing=True) == original_snapshot_id - - -def test_revert_after_downstream_change(sushi_context: Context): - environment = "prod" - change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.BREAKING) - - change_data_type( - sushi_context, - "sushi.waiter_revenue_by_day", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DOUBLE) - - def _validate_plan(_, plan): - snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') - assert snapshot.change_category == SnapshotChangeCategory.BREAKING - assert plan.missing_intervals - - apply_to_environment( - sushi_context, - environment, - SnapshotChangeCategory.BREAKING, - plan_validators=[_validate_plan], - ) - - -def test_auto_categorization(sushi_context: Context): - environment = "dev" - for config in sushi_context.configs.values(): - config.plan.auto_categorize_changes.sql = AutoCategorizationMode.FULL - initial_add(sushi_context, environment) - - version = sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).version - fingerprint = sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).fingerprint - - model = t.cast(SqlModel, sushi_context.get_model("sushi.customers", raise_if_missing=True)) - sushi_context.upsert_model( - "sushi.customers", - query_=ParsableSql(sql=model.query.select("'foo' AS foo").sql(dialect=model.dialect)), # type: ignore - ) - apply_to_environment(sushi_context, environment) - - assert ( - sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).fingerprint - != fingerprint - ) - assert ( - sushi_context.get_snapshot("sushi.waiter_as_customer_by_day", raise_if_missing=True).version - == version - ) - - -@use_terminal_console -def test_multi(mocker): - context = Context(paths=["examples/multi/repo_1", "examples/multi/repo_2"], gateway="memory") - - with patch.object(get_console(), "log_warning") as mock_logger: - context.plan_builder(environment="dev") - warnings = mock_logger.call_args[0][0] - repo1_path, repo2_path = context.configs.keys() - assert f"Linter warnings for {repo1_path}" in warnings - assert f"Linter warnings for {repo2_path}" not in warnings - - assert ( - context.render("bronze.a").sql() - == '''SELECT 1 AS "col_a", 'b' AS "col_b", 1 AS "one", 'repo_1' AS "dup"''' - ) - assert ( - context.render("silver.d").sql() - == '''SELECT "c"."col_a" AS "col_a", 2 AS "two", 'repo_2' AS "dup" FROM "memory"."silver"."c" AS "c"''' - ) - context._new_state_sync().reset(default_catalog=context.default_catalog) - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 5 - context.apply(plan) - - # Ensure before_all, after_all statements for multiple repos have executed - environment_statements = context.state_reader.get_environment_statements(c.PROD) - assert len(environment_statements) == 2 - assert context.fetchdf("select * from before_1").to_dict()["1"][0] == 1 - assert context.fetchdf("select * from before_2").to_dict()["2"][0] == 2 - assert context.fetchdf("select * from after_1").to_dict()["repo_1"][0] == "repo_1" - assert context.fetchdf("select * from after_2").to_dict()["repo_2"][0] == "repo_2" - - old_context = context - context = Context( - paths=["examples/multi/repo_1"], - state_sync=old_context.state_sync, - gateway="memory", - ) - context._engine_adapter = old_context.engine_adapter - del context.engine_adapters - - model = context.get_model("bronze.a") - assert model.project == "repo_1" - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql(sql=model.query.select("'c' AS c").sql(dialect=model.dialect)) - } - ) - ) - plan = context.plan_builder().build() - - assert set(snapshot.name for snapshot in plan.directly_modified) == { - '"memory"."bronze"."a"', - '"memory"."bronze"."b"', - '"memory"."silver"."e"', - } - assert sorted([x.name for x in list(plan.indirectly_modified.values())[0]]) == [ - '"memory"."silver"."c"', - '"memory"."silver"."d"', - ] - assert len(plan.missing_intervals) == 3 - context.apply(plan) - validate_apply_basics(context, c.PROD, plan.snapshots.values()) - - # Ensure that before_all and after_all statements of both repos are there despite planning with repo_1 - environment_statements = context.state_reader.get_environment_statements(c.PROD) - assert len(environment_statements) == 2 - - # Ensure that environment statements have the project field set correctly - sorted_env_statements = sorted(environment_statements, key=lambda es: es.project) - assert sorted_env_statements[0].project == "repo_1" - assert sorted_env_statements[1].project == "repo_2" - - # Assert before_all and after_all for each project - assert sorted_env_statements[0].before_all == [ - "CREATE TABLE IF NOT EXISTS before_1 AS select @one()" - ] - assert sorted_env_statements[0].after_all == [ - "CREATE TABLE IF NOT EXISTS after_1 AS select @dup()" - ] - assert sorted_env_statements[1].before_all == [ - "CREATE TABLE IF NOT EXISTS before_2 AS select @two()" - ] - assert sorted_env_statements[1].after_all == [ - "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" - ] - - -@use_terminal_console -def test_multi_repo_single_project_environment_statements_update(copy_to_temp_path): - paths = copy_to_temp_path("examples/multi") - repo_1_path = f"{paths[0]}/repo_1" - repo_2_path = f"{paths[0]}/repo_2" - - context = Context(paths=[repo_1_path, repo_2_path], gateway="memory") - context._new_state_sync().reset(default_catalog=context.default_catalog) - - initial_plan = context.plan_builder().build() - context.apply(initial_plan) - - # Get initial statements - initial_statements = context.state_reader.get_environment_statements(c.PROD) - assert len(initial_statements) == 2 - - # Modify repo_1's config to add a new before_all statement - repo_1_config_path = f"{repo_1_path}/config.yaml" - with open(repo_1_config_path, "r") as f: - config_content = f.read() - - # Add a new before_all statement to repo_1 only - modified_config = config_content.replace( - "CREATE TABLE IF NOT EXISTS before_1 AS select @one()", - "CREATE TABLE IF NOT EXISTS before_1 AS select @one()\n - CREATE TABLE IF NOT EXISTS before_1_modified AS select 999", - ) - - with open(repo_1_config_path, "w") as f: - f.write(modified_config) - - # Create new context with modified config but only for repo_1 - context_repo_1_only = Context( - paths=[repo_1_path], state_sync=context.state_sync, gateway="memory" - ) - - # Plan with only repo_1, this should preserve repo_2's statements from state - repo_1_plan = context_repo_1_only.plan_builder(environment="dev").build() - context_repo_1_only.apply(repo_1_plan) - updated_statements = context_repo_1_only.state_reader.get_environment_statements("dev") - - # Should still have statements from both projects - assert len(updated_statements) == 2 - - # Sort by project - sorted_updated = sorted(updated_statements, key=lambda es: es.project or "") - - # Verify repo_1 has the new statement - repo_1_updated = sorted_updated[0] - assert repo_1_updated.project == "repo_1" - assert len(repo_1_updated.before_all) == 2 - assert "CREATE TABLE IF NOT EXISTS before_1_modified" in repo_1_updated.before_all[1] - - # Verify repo_2 statements are preserved from state - repo_2_preserved = sorted_updated[1] - assert repo_2_preserved.project == "repo_2" - assert len(repo_2_preserved.before_all) == 1 - assert "CREATE TABLE IF NOT EXISTS before_2" in repo_2_preserved.before_all[0] - assert "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" in repo_2_preserved.after_all[0] - - -@use_terminal_console -def test_multi_virtual_layer(copy_to_temp_path): - paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") - path = Path(paths[0]) - first_db_path = str(path / "db_1.db") - second_db_path = str(path / "db_2.db") - - config = Config( - gateways={ - "first": GatewayConfig( - connection=DuckDBConnectionConfig(database=first_db_path), - variables={"overriden_var": "gateway_1"}, - ), - "second": GatewayConfig( - connection=DuckDBConnectionConfig(database=second_db_path), - variables={"overriden_var": "gateway_2"}, - ), - }, - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - model_naming=NameInferenceConfig(infer_names=True), - default_gateway="first", - gateway_managed_virtual_layer=True, - variables={"overriden_var": "global", "global_one": 88}, - ) - - context = Context(paths=paths, config=config) - assert context.default_catalog_per_gateway == {"first": "db_1", "second": "db_2"} - assert len(context.engine_adapters) == 2 - - # For the model without gateway the default should be used and the gateway variable should overide the global - assert ( - context.render("first_schema.model_one").sql() - == 'SELECT \'gateway_1\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' - ) - - # For model with gateway specified the appropriate variable should be used to overide - assert ( - context.render("db_2.second_schema.model_one").sql() - == 'SELECT \'gateway_2\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' - ) - - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 4 - context.apply(plan) - - # Validate the tables that source from the first tables are correct as well with evaluate - assert ( - context.evaluate( - "first_schema.model_two", start=now(), end=now(), execution_time=now() - ).to_string() - == " item_id global_one\n0 gateway_1 88" - ) - assert ( - context.evaluate( - "db_2.second_schema.model_two", start=now(), end=now(), execution_time=now() - ).to_string() - == " item_id global_one\n0 gateway_2 88" - ) - - assert sorted(set(snapshot.name for snapshot in plan.directly_modified)) == [ - '"db_1"."first_schema"."model_one"', - '"db_1"."first_schema"."model_two"', - '"db_2"."second_schema"."model_one"', - '"db_2"."second_schema"."model_two"', - ] - - model = context.get_model("db_1.first_schema.model_one") - - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder().build() - context.apply(plan) - - state_environments = context.state_reader.get_environments() - state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) - - assert state_environments[0].gateway_managed - assert len(state_snapshots) == len(state_environments[0].snapshots) - assert [snapshot.name for snapshot in plan.directly_modified] == [ - '"db_1"."first_schema"."model_one"' - ] - assert [x.name for x in list(plan.indirectly_modified.values())[0]] == [ - '"db_1"."first_schema"."model_two"' - ] - - assert len(plan.missing_intervals) == 1 - assert ( - context.evaluate( - "db_1.first_schema.model_one", start=now(), end=now(), execution_time=now() - ).to_string() - == " item_id global_one macro_one extra\n0 gateway_1 88 1 c" - ) - - # Create dev environment with changed models - model = context.get_model("db_2.second_schema.model_one") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - model = context.get_model("first_schema.model_two") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder("dev").build() - context.apply(plan) - - dev_environment = context.state_sync.get_environment("dev") - assert dev_environment is not None - - metadata_engine_1 = DuckDBMetadata.from_context(context) - start_schemas_1 = set(metadata_engine_1.schemas) - assert sorted(start_schemas_1) == sorted( - {"first_schema__dev", "sqlmesh", "first_schema", "sqlmesh__first_schema"} - ) - - metadata_engine_2 = DuckDBMetadata(context._get_engine_adapter("second")) - start_schemas_2 = set(metadata_engine_2.schemas) - assert sorted(start_schemas_2) == sorted( - {"sqlmesh__second_schema", "second_schema", "second_schema__dev"} - ) - - # Invalidate dev environment - context.invalidate_environment("dev") - invalidate_environment = context.state_sync.get_environment("dev") - assert invalidate_environment is not None - assert invalidate_environment.expiration_ts < dev_environment.expiration_ts # type: ignore - assert sorted(start_schemas_1) == sorted(set(metadata_engine_1.schemas)) - assert sorted(start_schemas_2) == sorted(set(metadata_engine_2.schemas)) - - # Run janitor - context._run_janitor() - assert context.state_sync.get_environment("dev") is None - removed_schemas = start_schemas_1 - set(metadata_engine_1.schemas) - assert removed_schemas == {"first_schema__dev"} - removed_schemas = start_schemas_2 - set(metadata_engine_2.schemas) - assert removed_schemas == {"second_schema__dev"} - prod_environment = context.state_sync.get_environment("prod") - - # Remove the second gateway's second model and apply plan - second_model = path / "models/second_schema/model_two.sql" - os.remove(second_model) - assert not second_model.exists() - context = Context(paths=paths, config=config) - plan = context.plan_builder().build() - context.apply(plan) - prod_environment = context.state_sync.get_environment("prod") - assert len(prod_environment.snapshots_) == 3 - - # Changing the flag should show a diff - context.config.gateway_managed_virtual_layer = False - plan = context.plan_builder().build() - assert not plan.requires_backfill - assert ( - plan.context_diff.previous_gateway_managed_virtual_layer - != plan.context_diff.gateway_managed_virtual_layer - ) - assert plan.context_diff.has_changes - - # This should error since the default_gateway won't have access to create the view on a non-shared catalog - with pytest.raises(NodeExecutionFailedError, match=r"Execution failed for node SnapshotId*"): - context.apply(plan) - - -def test_multi_dbt(mocker): - context = Context(paths=["examples/multi_dbt/bronze", "examples/multi_dbt/silver"]) - context._new_state_sync().reset(default_catalog=context.default_catalog) - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 4 - context.apply(plan) - validate_apply_basics(context, c.PROD, plan.snapshots.values()) - - environment_statements = context.state_sync.get_environment_statements(c.PROD) - assert len(environment_statements) == 2 - bronze_statements = environment_statements[0] - assert bronze_statements.before_all == [ - "JINJA_STATEMENT_BEGIN;\nCREATE TABLE IF NOT EXISTS analytic_stats (physical_table VARCHAR, evaluation_time VARCHAR);\nJINJA_END;" - ] - assert not bronze_statements.after_all - silver_statements = environment_statements[1] - assert not silver_statements.before_all - assert silver_statements.after_all == [ - "JINJA_STATEMENT_BEGIN;\n{{ store_schemas(schemas) }}\nJINJA_END;" - ] - assert "store_schemas" in silver_statements.jinja_macros.root_macros - analytics_table = context.fetchdf("select * from analytic_stats;") - assert sorted(analytics_table.columns) == sorted(["physical_table", "evaluation_time"]) - schema_table = context.fetchdf("select * from schema_table;") - assert sorted(schema_table.all_schemas[0]) == sorted(["bronze", "silver"]) - - -def test_multi_hybrid(mocker): - context = Context( - paths=["examples/multi_hybrid/dbt_repo", "examples/multi_hybrid/sqlmesh_repo"] - ) - context._new_state_sync().reset(default_catalog=context.default_catalog) - plan = context.plan_builder().build() - - assert len(plan.new_snapshots) == 5 - assert context.dag.roots == {'"memory"."dbt_repo"."e"'} - assert context.dag.graph['"memory"."dbt_repo"."c"'] == {'"memory"."sqlmesh_repo"."b"'} - assert context.dag.graph['"memory"."sqlmesh_repo"."b"'] == {'"memory"."sqlmesh_repo"."a"'} - assert context.dag.graph['"memory"."sqlmesh_repo"."a"'] == {'"memory"."dbt_repo"."e"'} - assert context.dag.downstream('"memory"."dbt_repo"."e"') == [ - '"memory"."sqlmesh_repo"."a"', - '"memory"."sqlmesh_repo"."b"', - '"memory"."dbt_repo"."c"', - '"memory"."dbt_repo"."d"', - ] - - sqlmesh_model_a = context.get_model("sqlmesh_repo.a") - dbt_model_c = context.get_model("dbt_repo.c") - assert sqlmesh_model_a.project == "sqlmesh_repo" - - sqlmesh_rendered = ( - 'SELECT "e"."col_a" AS "col_a", "e"."col_b" AS "col_b" FROM "memory"."dbt_repo"."e" AS "e"' - ) - dbt_rendered = 'SELECT DISTINCT ROUND(CAST(("b"."col_a" / NULLIF(100, 0)) AS DECIMAL(16, 2)), 2) AS "rounded_col_a" FROM "memory"."sqlmesh_repo"."b" AS "b"' - assert sqlmesh_model_a.render_query().sql() == sqlmesh_rendered - assert dbt_model_c.render_query().sql() == dbt_rendered - - context.apply(plan) - validate_apply_basics(context, c.PROD, plan.snapshots.values()) - - -def test_incremental_time_self_reference( - mocker: MockerFixture, sushi_context: Context, sushi_data_validator: SushiDataValidator -): - start_ts = to_timestamp("1 week ago") - start_date, end_date = to_date("1 week ago"), to_date("yesterday") - if to_timestamp(start_date) < start_ts: - # The start date must be aligned by the interval unit. - start_date += timedelta(days=1) - - df = sushi_context.engine_adapter.fetchdf( - "SELECT MIN(event_date) FROM sushi.customer_revenue_lifetime" - ) - assert df.iloc[0, 0] == pd.to_datetime(start_date) - df = sushi_context.engine_adapter.fetchdf( - "SELECT MAX(event_date) FROM sushi.customer_revenue_lifetime" - ) - assert df.iloc[0, 0] == pd.to_datetime(end_date) - results = sushi_data_validator.validate("sushi.customer_revenue_lifetime", start_date, end_date) - plan = sushi_context.plan_builder( - restate_models=["sushi.customer_revenue_lifetime", "sushi.customer_revenue_by_day"], - start=start_date, - end="5 days ago", - ).build() - revenue_lifeteime_snapshot = sushi_context.get_snapshot( - "sushi.customer_revenue_lifetime", raise_if_missing=True - ) - revenue_by_day_snapshot = sushi_context.get_snapshot( - "sushi.customer_revenue_by_day", raise_if_missing=True - ) - assert sorted(plan.missing_intervals, key=lambda x: x.snapshot_id) == sorted( - [ - SnapshotIntervals( - snapshot_id=revenue_lifeteime_snapshot.snapshot_id, - intervals=[ - (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), - (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), - (to_timestamp(to_date("5 days ago")), to_timestamp(to_date("4 days ago"))), - (to_timestamp(to_date("4 days ago")), to_timestamp(to_date("3 days ago"))), - (to_timestamp(to_date("3 days ago")), to_timestamp(to_date("2 days ago"))), - (to_timestamp(to_date("2 days ago")), to_timestamp(to_date("1 days ago"))), - (to_timestamp(to_date("1 day ago")), to_timestamp(to_date("today"))), - ], - ), - SnapshotIntervals( - snapshot_id=revenue_by_day_snapshot.snapshot_id, - intervals=[ - (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), - (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), - ], - ), - ], - key=lambda x: x.snapshot_id, - ) - sushi_context.console = mocker.Mock(spec=Console) - sushi_context.apply(plan) - num_batch_calls = Counter( - [x[0][0] for x in sushi_context.console.update_snapshot_evaluation_progress.call_args_list] # type: ignore - ) - # Validate that we made 7 calls to the customer_revenue_lifetime snapshot and 1 call to the customer_revenue_by_day snapshot - assert num_batch_calls == { - sushi_context.get_snapshot("sushi.customer_revenue_lifetime", raise_if_missing=True): 7, - sushi_context.get_snapshot("sushi.customer_revenue_by_day", raise_if_missing=True): 1, - } - # Validate that the results are the same as before the restate - assert results == sushi_data_validator.validate( - "sushi.customer_revenue_lifetime", start_date, end_date - ) - - -def test_invalidating_environment(sushi_context: Context): - apply_to_environment(sushi_context, "dev") - start_environment = sushi_context.state_sync.get_environment("dev") - assert start_environment is not None - metadata = DuckDBMetadata.from_context(sushi_context) - start_schemas = set(metadata.schemas) - assert "sushi__dev" in start_schemas - sushi_context.invalidate_environment("dev") - invalidate_environment = sushi_context.state_sync.get_environment("dev") - assert invalidate_environment is not None - schemas_prior_to_janitor = set(metadata.schemas) - assert invalidate_environment.expiration_ts < start_environment.expiration_ts # type: ignore - assert start_schemas == schemas_prior_to_janitor - sushi_context._run_janitor() - schemas_after_janitor = set(metadata.schemas) - assert sushi_context.state_sync.get_environment("dev") is None - assert start_schemas - schemas_after_janitor == {"sushi__dev"} - - -def test_environment_suffix_target_table(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context( - "examples/sushi", config="environment_suffix_table_config" - ) - context.apply(plan) - metadata = DuckDBMetadata.from_context(context) - environments_schemas = {"sushi"} - internal_schemas = {"sqlmesh", "sqlmesh__sushi"} - starting_schemas = environments_schemas | internal_schemas - # Make sure no new schemas are created - assert set(metadata.schemas) - starting_schemas == {"raw"} - prod_views = {x for x in metadata.qualified_views if x.db in environments_schemas} - # Make sure that all models are present - assert len(prod_views) == 16 - apply_to_environment(context, "dev") - # Make sure no new schemas are created - assert set(metadata.schemas) - starting_schemas == {"raw"} - dev_views = { - x for x in metadata.qualified_views if x.db in environments_schemas and "__dev" in x.name - } - # Make sure that there is a view with `__dev` for each view that exists in prod - assert len(dev_views) == len(prod_views) - assert {x.name.replace("__dev", "") for x in dev_views} - {x.name for x in prod_views} == set() - context.invalidate_environment("dev") - context._run_janitor() - views_after_janitor = metadata.qualified_views - # Make sure that the number of views after the janitor is the same as when you subtract away dev views - assert len(views_after_janitor) == len( - {x.sql(dialect="duckdb") for x in views_after_janitor} - - {x.sql(dialect="duckdb") for x in dev_views} - ) - # Double check there are no dev views - assert len({x for x in views_after_janitor if "__dev" in x.name}) == 0 - # Make sure prod views were not removed - assert {x.sql(dialect="duckdb") for x in prod_views} - { - x.sql(dialect="duckdb") for x in views_after_janitor - } == set() - - -def test_environment_suffix_target_catalog(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: - monkeypatch.chdir(tmp_path) - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(catalogs={"main_warehouse": ":memory:"}), - environment_suffix_target=EnvironmentSuffixTarget.CATALOG, - ) - - assert config.default_connection - - models_dir = tmp_path / "models" - models_dir.mkdir() - - (models_dir / "model.sql").write_text(""" - MODEL ( - name example_schema.test_model, - kind FULL - ); - - SELECT '1' as a""") - - (models_dir / "fqn_model.sql").write_text(""" - MODEL ( - name memory.example_fqn_schema.test_model_fqn, - kind FULL - ); - - SELECT '1' as a""") - - ctx = Context(config=config, paths=tmp_path) - - metadata = DuckDBMetadata.from_context(ctx) - assert ctx.default_catalog == "main_warehouse" - assert metadata.catalogs == {"main_warehouse", "memory"} - - ctx.plan(auto_apply=True) - - # prod should go to the default catalog and not be overridden to a catalog called 'prod' - assert ( - ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore - == "1" - ) - assert ( - ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore - == "1" - ) - assert metadata.catalogs == {"main_warehouse", "memory"} - assert metadata.schemas_in_catalog("main_warehouse") == [ - "example_schema", - "sqlmesh__example_schema", - ] - assert metadata.schemas_in_catalog("memory") == [ - "example_fqn_schema", - "sqlmesh__example_fqn_schema", - ] - - # dev should be overridden to go to a catalogs called 'main_warehouse__dev' and 'memory__dev' - ctx.plan(environment="dev", include_unmodified=True, auto_apply=True) - assert ( - ctx.engine_adapter.fetchone("select * from main_warehouse__dev.example_schema.test_model")[ - 0 - ] # type: ignore - == "1" - ) - assert ( - ctx.engine_adapter.fetchone("select * from memory__dev.example_fqn_schema.test_model_fqn")[ - 0 - ] # type: ignore - == "1" - ) - assert metadata.catalogs == {"main_warehouse", "main_warehouse__dev", "memory", "memory__dev"} - - # schemas in dev envs should match prod and not have a suffix - assert metadata.schemas_in_catalog("main_warehouse") == [ - "example_schema", - "sqlmesh__example_schema", - ] - assert metadata.schemas_in_catalog("main_warehouse__dev") == ["example_schema"] - assert metadata.schemas_in_catalog("memory") == [ - "example_fqn_schema", - "sqlmesh__example_fqn_schema", - ] - assert metadata.schemas_in_catalog("memory__dev") == ["example_fqn_schema"] - - ctx.invalidate_environment("dev", sync=True) - - # dev catalogs cleaned up - assert metadata.catalogs == {"main_warehouse", "memory"} - - # prod catalogs still contain physical layer and views still work - assert metadata.schemas_in_catalog("main_warehouse") == [ - "example_schema", - "sqlmesh__example_schema", - ] - assert metadata.schemas_in_catalog("memory") == [ - "example_fqn_schema", - "sqlmesh__example_fqn_schema", - ] - - assert ( - ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore - == "1" - ) - assert ( - ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore - == "1" - ) - - -def test_environment_catalog_mapping(init_and_plan_context: t.Callable): - environments_schemas = {"raw", "sushi"} - - def get_prod_dev_views(metadata: DuckDBMetadata) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: - views = metadata.qualified_views - prod_views = { - x for x in views if x.catalog == "prod_catalog" if x.db in environments_schemas - } - dev_views = {x for x in views if x.catalog == "dev_catalog" if x.db in environments_schemas} - return prod_views, dev_views - - def get_default_catalog_and_non_tables( - metadata: DuckDBMetadata, default_catalog: t.Optional[str] - ) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: - tables = metadata.qualified_tables - user_default_tables = { - x for x in tables if x.catalog == default_catalog and x.db != "sqlmesh" - } - non_default_tables = {x for x in tables if x.catalog != default_catalog} - return user_default_tables, non_default_tables - - context, plan = init_and_plan_context( - "examples/sushi", config="environment_catalog_mapping_config" - ) - context.apply(plan) - metadata = DuckDBMetadata(context.engine_adapter) - state_metadata = DuckDBMetadata.from_context(context.state_sync.state_sync) - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 0 - assert len(user_default_tables) == 15 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - apply_to_environment(context, "dev") - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 16 - assert len(user_default_tables) == 16 - assert len(non_default_tables) == 0 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - apply_to_environment(context, "prodnot") - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 32 - assert len(user_default_tables) == 16 - assert len(non_default_tables) == 0 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - context.invalidate_environment("dev") - context._run_janitor() - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 16 - assert len(user_default_tables) == 16 - assert len(non_default_tables) == 0 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - - -@pytest.mark.parametrize( - "context_fixture", - ["sushi_context", "sushi_no_default_catalog"], -) -def test_unaligned_start_snapshots(context_fixture: Context, request): - context = request.getfixturevalue(context_fixture) - environment = "dev" - apply_to_environment(context, environment) - # Make breaking change to model upstream of a depends_on_self model - context.upsert_model("sushi.order_items", stamp="1") - # Apply the change starting at a date later then the beginning of the downstream depends_on_self model - plan = apply_to_environment( - context, - environment, - choice=SnapshotChangeCategory.BREAKING, - plan_start="2 days ago", - enable_preview=True, - ) - revenue_lifetime_snapshot = context.get_snapshot( - "sushi.customer_revenue_lifetime", raise_if_missing=True - ) - # Validate that the depends_on_self model is non-deployable - assert not plan.deployability_index.is_deployable(revenue_lifetime_snapshot) - - -class OldPythonModel(PythonModel): - kind: ModelKind = ViewKind() - - -def test_python_model_default_kind_change(init_and_plan_context: t.Callable): - """ - Around 2024-07-17 Python models had their default Kind changed from VIEW to FULL in order to - avoid some edge cases where the views might not get updated in certain situations. - - This test ensures that if a user had a Python `kind: VIEW` model stored in state, - it can still be loaded without error and just show as a breaking change from `kind: VIEW` - to `kind: FULL` - """ - - # note: we deliberately dont specify a Kind here to allow the defaults to be picked up - python_model_file = """import typing as t -import pandas as pd # noqa: TID253 -from sqlmesh import ExecutionContext, model - -@model( - "sushi.python_view_model", - columns={ - "id": "int", - } -) -def execute( - context: ExecutionContext, - **kwargs: t.Any, -) -> pd.DataFrame: - return pd.DataFrame([ - {"id": 1} - ]) -""" - - context: Context - context, _ = init_and_plan_context("examples/sushi") - - with open(context.path / "models" / "python_view_model.py", mode="w", encoding="utf8") as f: - f.write(python_model_file) - - # monkey-patch PythonModel to default to kind: View again - # and ViewKind to allow python models again - with ( - mock.patch.object(ViewKind, "supports_python_models", return_value=True), - mock.patch("sqlmesh.core.model.definition.PythonModel", OldPythonModel), - ): - context.load() - - # check the monkey-patching worked - model = context.get_model("sushi.python_view_model") - assert model.kind.name == ModelKindName.VIEW - assert model.source_type == "python" - - # apply plan - plan: Plan = context.plan(auto_apply=True) - - # check that run() still works even though we have a Python model with kind: View in the state - snapshot_ids = [s for s in plan.directly_modified if "python_view_model" in s.name] - snapshot_from_state = list(context.state_sync.get_snapshots(snapshot_ids).values())[0] - assert snapshot_from_state.model.kind.name == ModelKindName.VIEW - assert snapshot_from_state.model.source_type == "python" - context.run() - - # reload context to load model with new defaults - # this also shows the earlier monkey-patching is no longer in effect - context.load() - model = context.get_model("sushi.python_view_model") - assert model.kind.name == ModelKindName.FULL - assert model.source_type == "python" - - plan = context.plan( - categorizer_config=CategorizerConfig.all_full() - ) # the default categorizer_config doesnt auto-categorize python models - - assert plan.has_changes - assert not plan.indirectly_modified - - assert len(plan.directly_modified) == 1 - snapshot_id = list(plan.directly_modified)[0] - assert snapshot_id.name == '"memory"."sushi"."python_view_model"' - assert plan.modified_snapshots[snapshot_id].change_category == SnapshotChangeCategory.BREAKING - - context.apply(plan) - - df = context.engine_adapter.fetchdf("SELECT id FROM sushi.python_view_model") - assert df["id"].to_list() == [1] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_of_full_model_with_start(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - restatement_plan = context.plan( - restate_models=["sushi.customers"], - start="2023-01-07", - auto_apply=True, - no_prompts=True, - ) - - sushi_customer_interval = restatement_plan.restatements[ - context.get_snapshot("sushi.customers").snapshot_id - ] - assert sushi_customer_interval == (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) - waiter_by_day_interval = restatement_plan.restatements[ - context.get_snapshot("sushi.waiter_as_customer_by_day").snapshot_id - ] - assert waiter_by_day_interval == (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_should_not_override_environment_statements(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - context.config.before_all = ["SELECT 'test_before_all';", *context.config.before_all] - context.load() - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - prod_env_statements = context.state_reader.get_environment_statements(c.PROD) - assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" - - context.plan( - restate_models=["sushi.waiter_revenue_by_day"], - start="2023-01-07", - auto_apply=True, - no_prompts=True, - ) - - prod_env_statements = context.state_reader.get_environment_statements(c.PROD) - assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_shouldnt_backfill_beyond_prod_intervals(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.top_waiters") - context.upsert_model(SqlModel.parse_obj({**model.dict(), "cron": "@hourly"})) - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - context.run() - - with time_machine.travel("2023-01-09 02:00:00 UTC"): - # It's time to backfill the waiter_revenue_by_day model but it hasn't run yet - restatement_plan = context.plan( - restate_models=["sushi.waiter_revenue_by_day"], - no_prompts=True, - skip_tests=True, - ) - intervals_by_id = {i.snapshot_id: i for i in restatement_plan.missing_intervals} - # Make sure the intervals don't go beyond the prod intervals - assert intervals_by_id[context.get_snapshot("sushi.top_waiters").snapshot_id].intervals[-1][ - 1 - ] == to_timestamp("2023-01-08 15:00:00 UTC") - assert intervals_by_id[ - context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id - ].intervals[-1][1] == to_timestamp("2023-01-08 00:00:00 UTC") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@use_terminal_console -def test_audit_only_metadata_change(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Add a new audit - model = context.get_model("sushi.waiter_revenue_by_day") - audits = model.audits.copy() - audits.append(("number_of_rows", {"threshold": exp.Literal.number(1)})) - model = model.copy(update={"audits": audits}) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - assert len(plan.new_snapshots) == 2 - assert all(s.change_category.is_metadata for s in plan.new_snapshots) - assert not plan.missing_intervals - - with capture_output() as output: - context.apply(plan) - - assert "Auditing models" in output.stdout - assert model.name in output.stdout - - -def initial_add(context: Context, environment: str): - assert not context.state_reader.get_environment(environment) - - plan = context.plan(environment, start=start(context), create_from="nonexistent_env") - validate_plan_changes(plan, added={x.snapshot_id for x in context.snapshots.values()}) - - context.apply(plan) - validate_apply_basics(context, environment, plan.snapshots.values()) - - -def test_plan_production_environment_statements(tmp_path: Path): - model_a = """ - MODEL ( - name test_schema.a, - kind FULL, - ); - - @IF( - @runtime_stage IN ('evaluating', 'creating'), - INSERT INTO schema_names_for_prod (physical_schema_name) VALUES (@resolve_template('@{schema_name}')) - ); - - SELECT 1 AS account_id - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - before_all = [ - "CREATE TABLE IF NOT EXISTS schema_names_for_@this_env (physical_schema_name VARCHAR)", - "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS should_create AS SELECT @runtime_stage)", - ] - after_all = [ - "@IF(@this_env = 'prod', CREATE TABLE IF NOT EXISTS after_t AS SELECT @var_5)", - "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS not_create AS SELECT @runtime_stage)", - ] - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=before_all, - after_all=after_all, - variables={"var_5": 5}, - ) - ctx = Context(paths=[tmp_path], config=config) - ctx.plan(auto_apply=True, no_prompts=True) - - before_t = ctx.fetchdf("select * from schema_names_for_prod").to_dict() - assert before_t["physical_schema_name"][0] == "sqlmesh__test_schema" - - after_t = ctx.fetchdf("select * from after_t").to_dict() - assert after_t["5"][0] == 5 - - environment_statements = ctx.state_reader.get_environment_statements(c.PROD) - assert environment_statements[0].before_all == before_all - assert environment_statements[0].after_all == after_all - assert environment_statements[0].python_env.keys() == {"__sqlmesh__vars__"} - assert environment_statements[0].python_env["__sqlmesh__vars__"].payload == "{'var_5': 5}" - - should_create = ctx.fetchdf("select * from should_create").to_dict() - assert should_create["before_all"][0] == "before_all" - - with pytest.raises( - Exception, match=r"Catalog Error: Table with name not_create does not exist!" - ): - ctx.fetchdf("select * from not_create") - - -def test_environment_statements_error_handling(tmp_path: Path): - model_a = """ - MODEL ( - name test_schema.a, - kind FULL, - ); - - SELECT 1 AS account_id - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - before_all = [ - "CREATE TABLE identical_table (physical_schema_name VARCHAR)", - "CREATE TABLE identical_table (physical_schema_name VARCHAR)", - ] - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=before_all, - ) - ctx = Context(paths=[tmp_path], config=config) - - expected_error_message = re.escape( - """An error occurred during execution of the following 'before_all' statement: - -CREATE TABLE identical_table (physical_schema_name TEXT) - -Catalog Error: Table with name "identical_table" already exists!""" - ) - - with pytest.raises(SQLMeshError, match=expected_error_message): - ctx.plan(auto_apply=True, no_prompts=True) - - after_all = [ - "@bad_macro()", - ] - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - after_all=after_all, - ) - ctx = Context(paths=[tmp_path], config=config) - - expected_error_message = re.escape( - """An error occurred during rendering of the 'after_all' statements: - -Failed to resolve macros for - -@bad_macro() - -Macro 'bad_macro' does not exist.""" - ) - - with pytest.raises(SQLMeshError, match=expected_error_message): - ctx.plan(auto_apply=True, no_prompts=True) - - -def test_before_all_after_all_execution_order(tmp_path: Path, mocker: MockerFixture): - model = """ - MODEL ( - name test_schema.model_that_depends_on_before_all, - kind FULL, - ); - - SELECT id, value FROM before_all_created_table - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "model.sql", "w") as f: - f.write(model) - - # before_all statement that creates a table that the above model depends on - before_all_statement = ( - "CREATE TABLE IF NOT EXISTS before_all_created_table AS SELECT 1 AS id, 'test' AS value" - ) - - # after_all that depends on the model - after_all_statement = "CREATE TABLE IF NOT EXISTS after_all_created_table AS SELECT id, value FROM test_schema.model_that_depends_on_before_all" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=[before_all_statement], - after_all=[after_all_statement], - ) - - execute_calls: t.List[str] = [] - - original_duckdb_execute = DuckDBEngineAdapter.execute - - def track_duckdb_execute(self, expression, **kwargs): - sql = expression if isinstance(expression, str) else expression.sql(dialect="duckdb") - state_tables = [ - "_snapshots", - "_environments", - "_versions", - "_intervals", - "_auto_restatements", - "_environment_statements", - ] - - # to ignore the state queries - if not any(table in sql.lower() for table in state_tables): - execute_calls.append(sql) - - return original_duckdb_execute(self, expression, **kwargs) - - ctx = Context(paths=[tmp_path], config=config) - - # the plan would fail if the execution order ever changes and before_all statements dont execute first - ctx.plan(auto_apply=True, no_prompts=True) - - mocker.patch.object(DuckDBEngineAdapter, "execute", track_duckdb_execute) - - # run with the patched execute - ctx.run("prod", start="2023-01-01", end="2023-01-02") - - # validate explicitly that the first execute is for the before_all - assert "before_all_created_table" in execute_calls[0] - - # and that the last is the sole after all that depends on the model - assert "after_all_created_table" in execute_calls[-1] - - -@time_machine.travel("2025-03-08 00:00:00 UTC") -def test_tz(init_and_plan_context): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model( - SqlModel.parse_obj( - {**model.dict(), "cron_tz": "America/Los_Angeles", "start": "2025-03-07"} - ) - ) - - def assert_intervals(plan, intervals): - assert ( - next( - intervals.intervals - for intervals in plan.missing_intervals - if intervals.snapshot_id.name == model.fqn - ) - == intervals - ) - - plan = context.plan_builder("prod", skip_tests=True).build() - - # we have missing intervals but not waiter_revenue_by_day because it's not midnight pacific yet - assert plan.missing_intervals - - with pytest.raises(StopIteration): - assert_intervals(plan, []) - - # now we're ready 8AM UTC == midnight PST - with time_machine.travel("2025-03-08 08:00:00 UTC"): - plan = context.plan_builder("prod", skip_tests=True).build() - assert_intervals(plan, [(to_timestamp("2025-03-07"), to_timestamp("2025-03-08"))]) - - with time_machine.travel("2025-03-09 07:00:00 UTC"): - plan = context.plan_builder("prod", skip_tests=True).build() - - assert_intervals( - plan, - [ - (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), - ], - ) - - with time_machine.travel("2025-03-09 08:00:00 UTC"): - plan = context.plan_builder("prod", skip_tests=True).build() - - assert_intervals( - plan, - [ - (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), - (to_timestamp("2025-03-08"), to_timestamp("2025-03-09")), - ], - ) - - context.apply(plan) - - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.missing_intervals - - -def apply_to_environment( - context: Context, - environment: str, - choice: t.Optional[SnapshotChangeCategory] = None, - plan_validators: t.Optional[t.Iterable[t.Callable]] = None, - apply_validators: t.Optional[t.Iterable[t.Callable]] = None, - plan_start: t.Optional[TimeLike] = None, - allow_destructive_models: t.Optional[t.List[str]] = None, - enable_preview: bool = False, -): - plan_validators = plan_validators or [] - apply_validators = apply_validators or [] - - plan_builder = context.plan_builder( - environment, - start=plan_start or start(context) if environment != c.PROD else None, - forward_only=choice == SnapshotChangeCategory.FORWARD_ONLY, - include_unmodified=True, - allow_destructive_models=allow_destructive_models if allow_destructive_models else [], - enable_preview=enable_preview, - ) - if environment != c.PROD: - plan_builder.set_start(plan_start or start(context)) - - if choice: - if choice == SnapshotChangeCategory.FORWARD_ONLY: - # FORWARD_ONLY is deprecated, fallback to NON_BREAKING to keep the existing tests - choice = SnapshotChangeCategory.NON_BREAKING - plan_choice(plan_builder, choice) - for validator in plan_validators: - validator(context, plan_builder.build()) - - plan = plan_builder.build() - context.apply(plan) - - validate_apply_basics(context, environment, plan.snapshots.values(), plan.deployability_index) - for validator in apply_validators: - validator(context) - return plan - - -def change_data_type( - context: Context, model_name: str, old_type: DataType.Type, new_type: DataType.Type -) -> None: - model = context.get_model(model_name) - assert model is not None - - if isinstance(model, SqlModel): - query = model.query.copy() - data_types = query.find_all(DataType) - for data_type in data_types: - if data_type.this == old_type: - data_type.set("this", new_type) - context.upsert_model(model_name, query_=ParsableSql(sql=query.sql(dialect=model.dialect))) - elif model.columns_to_types_ is not None: - for k, v in model.columns_to_types_.items(): - if v.this == old_type: - model.columns_to_types_[k] = DataType.build(new_type) - context.upsert_model(model_name, columns=model.columns_to_types_) - - -def validate_plan_changes( - plan: Plan, - *, - added: t.Optional[t.Iterable[SnapshotId]] = None, - modified: t.Optional[t.Iterable[str]] = None, - removed: t.Optional[t.Iterable[SnapshotId]] = None, -) -> None: - added = added or [] - modified = modified or [] - removed = removed or [] - assert set(added) == plan.context_diff.added - assert set(modified) == set(plan.context_diff.modified_snapshots) - assert set(removed) == set(plan.context_diff.removed_snapshots) - - -def validate_versions_same( - model_names: t.List[str], - versions: t.Dict[str, str], - other_versions: t.Dict[str, str], -) -> None: - for name in model_names: - assert versions[name] == other_versions[name] - - -def validate_versions_different( - model_names: t.List[str], - versions: t.Dict[str, str], - other_versions: t.Dict[str, str], -) -> None: - for name in model_names: - assert versions[name] != other_versions[name] - - -def validate_apply_basics( - context: Context, - environment: str, - snapshots: t.Iterable[Snapshot], - deployability_index: t.Optional[DeployabilityIndex] = None, -) -> None: - validate_snapshots_in_state_sync(snapshots, context) - validate_state_sync_environment(snapshots, environment, context) - validate_tables(snapshots, context, deployability_index) - validate_environment_views(snapshots, environment, context, deployability_index) - - -def validate_snapshots_in_state_sync(snapshots: t.Iterable[Snapshot], context: Context) -> None: - snapshot_infos = map(to_snapshot_info, snapshots) - state_sync_table_infos = map( - to_snapshot_info, context.state_reader.get_snapshots(snapshots).values() - ) - assert set(snapshot_infos) == set(state_sync_table_infos) - - -def validate_state_sync_environment( - snapshots: t.Iterable[Snapshot], env: str, context: Context -) -> None: - environment = context.state_reader.get_environment(env) - assert environment - snapshot_infos = map(to_snapshot_info, snapshots) - environment_table_infos = map(to_snapshot_info, environment.snapshots) - assert set(snapshot_infos) == set(environment_table_infos) - - -def validate_tables( - snapshots: t.Iterable[Snapshot], - context: Context, - deployability_index: t.Optional[DeployabilityIndex] = None, -) -> None: - adapter = context.engine_adapter - deployability_index = deployability_index or DeployabilityIndex.all_deployable() - for snapshot in snapshots: - is_deployable = deployability_index.is_representative(snapshot) - if not snapshot.is_model or snapshot.is_external: - continue - table_should_exist = not snapshot.is_embedded - assert adapter.table_exists(snapshot.table_name(is_deployable)) == table_should_exist - if table_should_exist: - assert select_all(snapshot.table_name(is_deployable), adapter) - - -def validate_environment_views( - snapshots: t.Iterable[Snapshot], - environment: str, - context: Context, - deployability_index: t.Optional[DeployabilityIndex] = None, -) -> None: - adapter = context.engine_adapter - deployability_index = deployability_index or DeployabilityIndex.all_deployable() - for snapshot in snapshots: - is_deployable = deployability_index.is_representative(snapshot) - if not snapshot.is_model or snapshot.is_symbolic: - continue - view_name = snapshot.qualified_view_name.for_environment( - EnvironmentNamingInfo.from_environment_catalog_mapping( - context.config.environment_catalog_mapping, - name=environment, - suffix_target=context.config.environment_suffix_target, - ) - ) - - assert adapter.table_exists(view_name) - assert select_all(snapshot.table_name(is_deployable), adapter) == select_all( - view_name, adapter - ) - - -def select_all(table: str, adapter: EngineAdapter) -> t.Iterable: - return adapter.fetchall(f"select * from {table} order by 1") - - -def snapshots_to_versions(snapshots: t.Iterable[Snapshot]) -> t.Dict[str, str]: - return {snapshot.name: snapshot.version or "" for snapshot in snapshots} - - -def to_snapshot_info(snapshot: SnapshotInfoLike) -> SnapshotTableInfo: - return snapshot.table_info - - -def start(context: Context) -> TimeLike: - env = context.state_sync.get_environment("prod") - assert env - return env.start_at - - -def add_projection_to_model(model: SqlModel, literal: bool = True) -> SqlModel: - one_expr = exp.Literal.number(1).as_("one") if literal else exp.column("one") - kwargs = { - **model.dict(), - "query": model.query.select(one_expr), # type: ignore - } - return SqlModel.parse_obj(kwargs) - - -def test_plan_environment_statements_doesnt_cause_extra_diff(tmp_path: Path): - model_a = """ - MODEL ( - name test_schema.a, - kind FULL, - ); - - SELECT 1; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - (models_dir / "a.sql").write_text(model_a) - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=["select 1 as before_all"], - after_all=["select 2 as after_all"], - ) - ctx = Context(paths=[tmp_path], config=config) - - # first plan - should apply changes - assert ctx.plan(auto_apply=True, no_prompts=True).has_changes - - # second plan - nothing has changed so should report no changes - assert not ctx.plan(auto_apply=True, no_prompts=True).has_changes - - -def test_janitor_cleanup_order(mocker: MockerFixture, tmp_path: Path): - def setup_scenario(): - models_dir = tmp_path / "models" - - if not models_dir.exists(): - models_dir.mkdir() - - model1_path = models_dir / "model1.sql" - - with open(model1_path, "w") as f: - f.write("MODEL(name test.model1, kind FULL); SELECT 1 AS col") - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - ) - ctx = Context(paths=[tmp_path], config=config) - - ctx.plan("dev", no_prompts=True, auto_apply=True) - - model1_snapshot = ctx.get_snapshot("test.model1") - - # Delete the model file to cause a snapshot expiration - model1_path.unlink() - - ctx.load() - - ctx.plan("dev", no_prompts=True, auto_apply=True) - - # Invalidate the environment to cause an environment cleanup - ctx.invalidate_environment("dev") - - try: - ctx._run_janitor(ignore_ttl=True) - except: - pass - - return ctx, model1_snapshot - - # Case 1: Assume that the snapshot cleanup yields an error, the snapshot records - # should still exist in the state sync so the next janitor can retry - mocker.patch( - "sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup", - side_effect=Exception("snapshot cleanup error"), - ) - ctx, model1_snapshot = setup_scenario() - - # - Check that the snapshot record exists in the state sync - state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) - assert state_snapshot - - # - Run the janitor again, this time it should succeed - mocker.patch("sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup") - ctx._run_janitor(ignore_ttl=True) - - # - Check that the snapshot record does not exist in the state sync anymore - state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) - assert not state_snapshot - - # Case 2: Assume that the view cleanup yields an error, the enviroment - # record should still exist - mocker.patch( - "sqlmesh.core.context.cleanup_expired_views", side_effect=Exception("view cleanup error") - ) - ctx, model1_snapshot = setup_scenario() - - views = ctx.fetchdf("FROM duckdb_views() SELECT * EXCLUDE(sql) WHERE NOT internal") - assert views.empty - - # - Check that the environment record exists in the state sync - assert ctx.state_sync.get_environment("dev") - - # - Run the janitor again, this time it should succeed - mocker.patch("sqlmesh.core.context.cleanup_expired_views") - ctx._run_janitor(ignore_ttl=True) - - # - Check that the environment record does not exist in the state sync anymore - assert not ctx.state_sync.get_environment("dev") - - -@use_terminal_console -def test_destroy(copy_to_temp_path): - # Testing project with two gateways to verify cleanup is performed across engines - paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") - path = Path(paths[0]) - first_db_path = str(path / "db_1.db") - second_db_path = str(path / "db_2.db") - - config = Config( - gateways={ - "first": GatewayConfig( - connection=DuckDBConnectionConfig(database=first_db_path), - variables={"overriden_var": "gateway_1"}, - ), - "second": GatewayConfig( - connection=DuckDBConnectionConfig(database=second_db_path), - variables={"overriden_var": "gateway_2"}, - ), - }, - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - model_naming=NameInferenceConfig(infer_names=True), - default_gateway="first", - gateway_managed_virtual_layer=True, - variables={"overriden_var": "global", "global_one": 88}, - ) - - context = Context(paths=paths, config=config) - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 4 - context.apply(plan) - - # Confirm cache exists - cache_path = Path(path) / ".cache" - assert cache_path.exists() - assert len(list(cache_path.iterdir())) > 0 - - model = context.get_model("db_1.first_schema.model_one") - - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder().build() - context.apply(plan) - - state_environments = context.state_reader.get_environments() - state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) - - assert len(state_snapshots) == len(state_environments[0].snapshots) - - # Create dev environment with changed models - model = context.get_model("db_2.second_schema.model_one") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - model = context.get_model("first_schema.model_two") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder("dev").build() - context.apply(plan) - - dev_environment = context.state_sync.get_environment("dev") - assert dev_environment is not None - - state_environments = context.state_reader.get_environments() - state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) - assert ( - len(state_snapshots) - == len(state_environments[0].snapshots) - == len(state_environments[1].snapshots) - ) - - # The state tables at this point should be able to be retrieved - state_tables = { - "_environments", - "_snapshots", - "_intervals", - "_auto_restatements", - "_environment_statements", - "_intervals", - "_versions", - } - for table_name in state_tables: - context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") - - # The actual tables as well - context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_one") - context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_two") - context.fetchdf(f"SELECT * FROM db_1.first_schema.model_one") - context.fetchdf(f"SELECT * FROM db_1.first_schema.model_two") - - # Use the destroy command to remove all data objects and state - # Mock the console confirmation to automatically return True - with patch.object(context.console, "_confirm", return_value=True): - context._destroy() - - # Ensure all tables have been removed - for table_name in state_tables: - with pytest.raises( - Exception, match=f"Catalog Error: Table with name {table_name} does not exist!" - ): - context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") - - # Validate tables have been deleted as well - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_two does not exist!" - ): - context.fetchdf("SELECT * FROM db_1.first_schema.model_two") - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_one does not exist!" - ): - context.fetchdf("SELECT * FROM db_1.first_schema.model_one") - - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_two does not exist!" - ): - context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_two") - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_one does not exist!" - ): - context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_one") - - # Ensure the cache has been removed - assert not cache_path.exists() - - -@use_terminal_console -def test_audits_running_on_metadata_changes(tmp_path: Path): - def setup_senario(model_before: str, model_after: str): - models_dir = Path("models") - create_temp_file(tmp_path, models_dir / "test.sql", model_before) - - # Create first snapshot - context = Context(paths=tmp_path, config=Config()) - context.plan("prod", no_prompts=True, auto_apply=True) - - # Create second (metadata) snapshot - create_temp_file(tmp_path, models_dir / "test.sql", model_after) - context.load() - - with capture_output() as output: - with pytest.raises(PlanError): - context.plan("prod", no_prompts=True, auto_apply=True) - - assert 'Failed models\n\n "model"' in output.stdout - - return output - - # Ensure incorrect audits (bad data, incorrect definition etc) are evaluated immediately - output = setup_senario( - "MODEL (name model); SELECT NULL AS col", - "MODEL (name model, audits (not_null(columns=[col]))); SELECT NULL AS col", - ) - assert "'not_null' audit error: 1 row failed" in output.stdout - - output = setup_senario( - "MODEL (name model); SELECT NULL AS col", - "MODEL (name model, audits (not_null(columns=[this_col_does_not_exist]))); SELECT NULL AS col", - ) - assert ( - 'Binder Error: Referenced column "this_col_does_not_exist" not found in \nFROM clause!' - in output.stdout - ) - - -@pytest.mark.set_default_connection(disable=True) -def test_missing_connection_config(): - # This is testing the actual implementation of Config.get_connection - # To make writing tests easier, it's patched by the autouse fixture provide_sqlmesh_default_connection - # Case 1: No default_connection or gateways specified should raise a ConfigError - with pytest.raises(ConfigError): - ctx = Context(config=Config()) - - # Case 2: No connection specified in the gateway should raise a ConfigError - with pytest.raises(ConfigError): - ctx = Context(config=Config(gateways={"incorrect": GatewayConfig()})) - - # Case 3: Specifying a default_connection or connection in the gateway should work - ctx = Context(config=Config(default_connection=DuckDBConnectionConfig())) - ctx = Context( - config=Config(gateways={"default": GatewayConfig(connection=DuckDBConnectionConfig())}) - ) - - -@use_terminal_console -def test_render_path_instead_of_model(tmp_path: Path): - create_temp_file(tmp_path, Path("models/test.sql"), "MODEL (name test_model); SELECT 1 AS col") - ctx = Context(paths=tmp_path, config=Config()) - - # Case 1: Fail gracefully when the user is passing in a path instead of a model name - for test_model in ["models/test.sql", "models/test.py"]: - with pytest.raises( - SQLMeshError, - match="Resolving models by path is not supported, please pass in the model name instead.", - ): - ctx.render(test_model) - - # Case 2: Fail gracefully when the model name is not found - with pytest.raises(SQLMeshError, match="Cannot find model with name 'incorrect_model'"): - ctx.render("incorrect_model") - - # Case 3: Render the model successfully - assert ctx.render("test_model").sql() == 'SELECT 1 AS "col"' - - -@use_terminal_console -def test_plan_always_recreate_environment(tmp_path: Path): - def plan_with_output(ctx: Context, environment: str): - with patch.object(logger, "info") as mock_logger: - with capture_output() as output: - ctx.load() - ctx.plan(environment, no_prompts=True, auto_apply=True) - - # Facade logs info "Promoting environment {environment}" - assert mock_logger.call_args[0][1] == environment - - return output - - models_dir = tmp_path / "models" - - logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") - - create_temp_file( - tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" - ) - - config = Config(plan=PlanConfig(always_recreate_environment=True)) - ctx = Context(paths=[tmp_path], config=config) - - # Case 1: Neither prod nor dev exists, so dev is initialized - output = plan_with_output(ctx, "dev") - - assert """`dev` environment will be initialized""" in output.stdout - - # Case 2: Prod does not exist, so dev is updated - create_temp_file( - tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 5 AS col" - ) - - output = plan_with_output(ctx, "dev") - assert "`dev` environment will be initialized" in output.stdout - - # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod - output = plan_with_output(ctx, "prod") - assert "`prod` environment will be initialized" in output.stdout - - # Case 4: Dev is updated with a breaking change. Prod exists now so plan comparisons moving forward should be against prod - create_temp_file( - tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" - ) - ctx.load() - - plan = ctx.plan_builder("dev").build() - - assert ( - next(iter(plan.context_diff.snapshots.values())).change_category - == SnapshotChangeCategory.BREAKING - ) - - output = plan_with_output(ctx, "dev") - assert "New environment `dev` will be created from `prod`" in output.stdout - assert "Differences from the `prod` environment" in output.stdout - - # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes - # so it's still classified as a breaking change - create_temp_file( - tmp_path, - models_dir / "a.sql", - "MODEL (name test.a, kind FULL, owner 'test'); SELECT 10 AS col", - ) - ctx.load() - - plan = ctx.plan_builder("dev").build() - - assert ( - next(iter(plan.context_diff.snapshots.values())).change_category - == SnapshotChangeCategory.BREAKING - ) - - output = plan_with_output(ctx, "dev") - assert "New environment `dev` will be created from `prod`" in output.stdout - assert "Differences from the `prod` environment" in output.stdout - - stdout_rstrip = "\n".join([line.rstrip() for line in output.stdout.split("\n")]) - assert ( - """MODEL ( - name test.a, -+ owner test, - kind FULL - ) - SELECT -- 5 AS col -+ 10 AS col""" - in stdout_rstrip - ) - - # Case 6: Ensure that target environment and create_from environment are not the same - output = plan_with_output(ctx, "prod") - assert not "New environment `prod` will be created from `prod`" in output.stdout - - # Case 7: Check that we can still run Context::diff() against any environment - for environment in ["dev", "prod"]: - context_diff = ctx._context_diff(environment) - assert context_diff.environment == environment - - -@time_machine.travel("2020-01-01 00:00:00 UTC") -def test_scd_type_2_full_restatement_no_start_date(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Initial product catalog of 3 products - raw_products = d.parse(""" - MODEL ( - name memory.store.raw_products, - kind FULL - ); - - SELECT * FROM VALUES - (101, 'Laptop Pro', 1299.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), - (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), - (103, 'Office Chair', 199.99, 'Furniture', '2020-01-01 00:00:00'::TIMESTAMP) - AS t(product_id, product_name, price, category, last_updated); - """) - - # SCD Type 2 model for product history tracking - product_history = d.parse(""" - MODEL ( - name memory.store.product_history, - kind SCD_TYPE_2_BY_TIME ( - unique_key product_id, - updated_at_name last_updated, - disable_restatement false - ), - owner catalog_team, - cron '0 */6 * * *', - grain product_id, - description 'Product catalog change history' - ); - - SELECT - product_id::INT AS product_id, - product_name::TEXT AS product_name, - price::DECIMAL(10,2) AS price, - category::TEXT AS category, - last_updated AS last_updated - FROM - memory.store.raw_products; - """) - - raw_products_model = load_sql_based_model(raw_products) - product_history_model = load_sql_based_model(product_history) - context.upsert_model(raw_products_model) - context.upsert_model(product_history_model) - - # Initial plan and apply - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - query = "SELECT product_id, product_name, price, category, last_updated, valid_from, valid_to FROM memory.store.product_history ORDER BY product_id, valid_from" - initial_data = context.engine_adapter.fetchdf(query) - - # Validate initial state of 3 products all active - assert len(initial_data) == 3 - assert initial_data["valid_to"].isna().all() - initial_product_names = set(initial_data["product_name"].tolist()) - assert initial_product_names == {"Laptop Pro", "Wireless Mouse", "Office Chair"} - - # Price update and category change - with time_machine.travel("2020-01-15 12:00:00 UTC"): - raw_products_v2 = d.parse(""" - MODEL ( - name memory.store.raw_products, - kind FULL - ); - - SELECT * FROM VALUES - (101, 'Laptop Pro', 1199.99, 'Electronics', '2020-01-15 00:00:00'::TIMESTAMP), - (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), - (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP) - AS t(product_id, product_name, price, category, last_updated); - """) - raw_products_v2_model = load_sql_based_model(raw_products_v2) - context.upsert_model(raw_products_v2_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - context.run() - - data_after_first_change = context.engine_adapter.fetchdf(query) - - # Should have 5 records (3 original closed, 2 new activε, 1 unchanged) - assert len(data_after_first_change) == 5 - - # Second change - with time_machine.travel("2020-02-01 10:00:00 UTC"): - raw_products_v3 = d.parse(""" - MODEL ( - name memory.store.raw_products, - kind FULL - ); - - SELECT * FROM VALUES - (101, 'Laptop Pro Max', 1399.99, 'Electronics', '2020-02-01 00:00:00'::TIMESTAMP), - (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP), - (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP) - AS t(product_id, product_name, price, category, last_updated); - """) - raw_products_v3_model = load_sql_based_model(raw_products_v3) - context.upsert_model(raw_products_v3_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - context.run() - data_after_second_change = context.engine_adapter.fetchdf(query) - assert len(data_after_second_change) == 6 - - # Store the current state before full restatement - data_before_full_restatement = data_after_second_change.copy() - - # Perform full restatement (no start date provided) - with time_machine.travel("2020-02-01 15:00:00 UTC"): - plan = context.plan_builder( - "prod", skip_tests=True, restate_models=["memory.store.product_history"] - ).build() - context.apply(plan) - data_after_full_restatement = context.engine_adapter.fetchdf(query) - assert len(data_after_full_restatement) == 3 - - # Check that all currently active products before restatement are still active after restatement - active_before = data_before_full_restatement[ - data_before_full_restatement["valid_to"].isna() - ] - active_after = data_after_full_restatement - assert set(active_before["product_id"]) == set(active_after["product_id"]) - - expected_products = { - 101: { - "product_name": "Laptop Pro Max", - "price": 1399.99, - "category": "Electronics", - "last_updated": "2020-02-01", - }, - 102: { - "product_name": "Wireless Mouse", - "price": 49.99, - "category": "Electronics", - "last_updated": "2020-01-01", - }, - 103: { - "product_name": "Ergonomic Office Chair", - "price": 229.99, - "category": "Office Furniture", - "last_updated": "2020-01-15", - }, - } - for _, row in data_after_full_restatement.iterrows(): - pid = row["product_id"] - assert pid in expected_products - expected = expected_products[pid] - assert row["product_name"] == expected["product_name"] - assert float(row["price"]) == expected["price"] - assert row["category"] == expected["category"] - - # valid_from should be the epoch, valid_to should be NaT - assert str(row["valid_from"]) == "1970-01-01 00:00:00" - assert pd.isna(row["valid_to"]) - - -def test_plan_evaluator_correlation_id(tmp_path: Path): - def _correlation_id_in_sqls(correlation_id: CorrelationId, mock_logger): - sqls = [call[0][0] for call in mock_logger.call_args_list] - return any(f"/* {correlation_id} */" in sql for sql in sqls) - - ctx = Context(paths=[tmp_path], config=Config()) - - # Case: Ensure that the correlation id (plan_id) is included in the SQL for each plan - for i in range(2): - create_temp_file( - tmp_path, - Path("models", "test.sql"), - f"MODEL (name test.a, kind FULL); SELECT {i} AS col", - ) - - with mock.patch("sqlmesh.core.engine_adapter.base.EngineAdapter._log_sql") as mock_logger: - ctx.load() - plan = ctx.plan(auto_apply=True, no_prompts=True) - - correlation_id = CorrelationId.from_plan_id(plan.plan_id) - assert str(correlation_id) == f"SQLMESH_PLAN: {plan.plan_id}" - - assert _correlation_id_in_sqls(correlation_id, mock_logger) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_scd_type_2_regular_run_with_offset(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - raw_employee_status = d.parse(""" - MODEL ( - name memory.hr_system.raw_employee_status, - kind FULL - ); - - SELECT - 1001 AS employee_id, - 'engineering' AS department, - 'EMEA' AS region, - '2023-01-08 15:00:00 UTC' AS last_modified; - """) - - employee_history = d.parse(""" - MODEL ( - name memory.hr_system.employee_history, - kind SCD_TYPE_2_BY_TIME ( - unique_key employee_id, - updated_at_name last_modified, - disable_restatement false - ), - owner hr_analytics, - cron '0 7 * * *', - grain employee_id, - description 'Historical tracking of employee status changes' - ); - - SELECT - employee_id::INT AS employee_id, - department::TEXT AS department, - region::TEXT AS region, - last_modified AS last_modified - FROM - memory.hr_system.raw_employee_status; - """) - - raw_employee_status_model = load_sql_based_model(raw_employee_status) - employee_history_model = load_sql_based_model(employee_history) - context.upsert_model(raw_employee_status_model) - context.upsert_model(employee_history_model) - - # Initial plan and apply - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - query = "SELECT employee_id, department, region, valid_from, valid_to FROM memory.hr_system.employee_history ORDER BY employee_id, valid_from" - initial_data = context.engine_adapter.fetchdf(query) - - assert len(initial_data) == 1 - assert initial_data["valid_to"].isna().all() - assert initial_data["department"].tolist() == ["engineering"] - assert initial_data["region"].tolist() == ["EMEA"] - - # Apply a future plan with source changes a few hours before the cron time of the SCD Type 2 model BUT on the same day - with time_machine.travel("2023-01-09 00:10:00 UTC"): - raw_employee_status_v2 = d.parse(""" - MODEL ( - name memory.hr_system.raw_employee_status, - kind FULL - ); - - SELECT - 1001 AS employee_id, - 'engineering' AS department, - 'AMER' AS region, - '2023-01-09 00:10:00 UTC' AS last_modified; - """) - raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) - context.upsert_model(raw_employee_status_v2_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - - # The 7th hour of the day the run is kicked off for the SCD Type 2 model - with time_machine.travel("2023-01-09 07:00:01 UTC"): - context.run() - data_after_change = context.engine_adapter.fetchdf(query) - - # Validate the SCD2 records for employee 1001 - assert len(data_after_change) == 2 - assert data_after_change.iloc[0]["employee_id"] == 1001 - assert data_after_change.iloc[0]["department"] == "engineering" - assert data_after_change.iloc[0]["region"] == "EMEA" - assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" - assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" - assert data_after_change.iloc[1]["employee_id"] == 1001 - assert data_after_change.iloc[1]["department"] == "engineering" - assert data_after_change.iloc[1]["region"] == "AMER" - assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" - assert pd.isna(data_after_change.iloc[1]["valid_to"]) - - # Update source model again a bit later on the same day - raw_employee_status_v2 = d.parse(""" - MODEL ( - name memory.hr_system.raw_employee_status, - kind FULL - ); - - SELECT - 1001 AS employee_id, - 'sales' AS department, - 'ANZ' AS region, - '2023-01-09 07:26:00 UTC' AS last_modified; - """) - raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) - context.upsert_model(raw_employee_status_v2_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - - # A day later the run is kicked off for the SCD Type 2 model again - with time_machine.travel("2023-01-10 07:00:00 UTC"): - context.run() - data_after_change = context.engine_adapter.fetchdf(query) - - # Validate the SCD2 history for employee 1001 after second change with the historical records intact - assert len(data_after_change) == 3 - assert data_after_change.iloc[0]["employee_id"] == 1001 - assert data_after_change.iloc[0]["department"] == "engineering" - assert data_after_change.iloc[0]["region"] == "EMEA" - assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" - assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" - assert data_after_change.iloc[1]["employee_id"] == 1001 - assert data_after_change.iloc[1]["department"] == "engineering" - assert data_after_change.iloc[1]["region"] == "AMER" - assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" - assert str(data_after_change.iloc[1]["valid_to"]) == "2023-01-09 07:26:00" - assert data_after_change.iloc[2]["employee_id"] == 1001 - assert data_after_change.iloc[2]["department"] == "sales" - assert data_after_change.iloc[2]["region"] == "ANZ" - assert str(data_after_change.iloc[2]["valid_from"]) == "2023-01-09 07:26:00" - assert pd.isna(data_after_change.iloc[2]["valid_to"]) - - # Now test restatement works (full restatement support currently) - with time_machine.travel("2023-01-10 07:38:00 UTC"): - plan = context.plan_builder( - "prod", - skip_tests=True, - restate_models=["memory.hr_system.employee_history"], - start="2023-01-09 00:10:00", - ).build() - context.apply(plan) - restated_data = context.engine_adapter.fetchdf(query) - - # Validate the SCD2 history after restatement has been wiped bar one - assert len(restated_data) == 1 - assert restated_data.iloc[0]["employee_id"] == 1001 - assert restated_data.iloc[0]["department"] == "sales" - assert restated_data.iloc[0]["region"] == "ANZ" - assert str(restated_data.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" - assert pd.isna(restated_data.iloc[0]["valid_to"]) - - -def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path): - paths = copy_to_temp_path("examples/multi") - repo_1_path = paths[0] / "repo_1" - repo_2_path = paths[0] / "repo_2" - - # Add an extra gateway to repo_2's config - repo_2_config_path = repo_2_path / "config.yaml" - config_content = repo_2_config_path.read_text() - - modified_config = config_content.replace( - "default_gateway: local", - dedent(""" - extra: - connection: - type: duckdb - database: extra.duckdb - - default_gateway: local - """), - ) - - repo_2_config_path.write_text(modified_config) - - # Create context with both repos but using the repo_1 path first - context = Context( - paths=(repo_1_path, repo_2_path), - gateway="memory", - ) - - # Verify all gateways from both repos are present - gathered_gateways = context.engine_adapters.keys() - expected_gateways = {"local", "memory", "extra"} - assert gathered_gateways == expected_gateways - - -def test_physical_table_naming_strategy_table_only(copy_to_temp_path: t.Callable): - sushi_context = Context( - paths=copy_to_temp_path("examples/sushi"), - config="table_only_naming_config", - ) - - assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.TABLE_ONLY - sushi_context.plan(auto_apply=True) - - adapter = sushi_context.engine_adapter - - snapshot_tables = [ - dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) - for r in adapter.fetchall( - "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" - ) - ] - - assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) - - prod_env = sushi_context.state_reader.get_environment("prod") - assert prod_env - - prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) - - assert all( - s.table_naming_convention == TableNamingConvention.TABLE_ONLY - for s in prod_env_snapshots.values() - ) - - -def test_physical_table_naming_strategy_hash_md5(copy_to_temp_path: t.Callable): - sushi_context = Context( - paths=copy_to_temp_path("examples/sushi"), - config="hash_md5_naming_config", - ) - - assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.HASH_MD5 - sushi_context.plan(auto_apply=True) - - adapter = sushi_context.engine_adapter - - snapshot_tables = [ - dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) - for r in adapter.fetchall( - "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" - ) - ] - - assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) - assert all([t["table"].startswith("sqlmesh_md5") for t in snapshot_tables]) - - prod_env = sushi_context.state_reader.get_environment("prod") - assert prod_env - - prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) - - assert all( - s.table_naming_convention == TableNamingConvention.HASH_MD5 - for s in prod_env_snapshots.values() - ) - - -@pytest.mark.slow -def test_default_audits_applied_in_plan(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - - # Create a model with data that will pass the audits - create_temp_file( - tmp_path, - models_dir / "orders.sql", - dedent(""" - MODEL ( - name test.orders, - kind FULL - ); - - SELECT - 1 AS order_id, - 'customer_1' AS customer_id, - 100.50 AS amount, - '2024-01-01'::DATE AS order_date - UNION ALL - SELECT - 2 AS order_id, - 'customer_2' AS customer_id, - 200.75 AS amount, - '2024-01-02'::DATE AS order_date - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", - audits=[ - "not_null(columns := [order_id, customer_id])", - "unique_values(columns := [order_id])", - ], - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Create and apply plan, here audits should pass - plan = context.plan("prod", no_prompts=True) - context.apply(plan) - - # Verify model has the default audits - model = context.get_model("test.orders") - assert len(model.audits) == 2 - - audit_names = [audit[0] for audit in model.audits] - assert "not_null" in audit_names - assert "unique_values" in audit_names - - # Verify audit arguments are preserved - for audit_name, audit_args in model.audits: - if audit_name == "not_null": - assert "columns" in audit_args - columns = [col.name for col in audit_args["columns"].expressions] - assert "order_id" in columns - assert "customer_id" in columns - elif audit_name == "unique_values": - assert "columns" in audit_args - columns = [col.name for col in audit_args["columns"].expressions] - assert "order_id" in columns - - -@pytest.mark.slow -def test_default_audits_fail_on_bad_data(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - - # Create a model with data that violates NOT NULL constraint - create_temp_file( - tmp_path, - models_dir / "bad_orders.sql", - dedent(""" - MODEL ( - name test.bad_orders, - kind FULL - ); - - SELECT - 1 AS order_id, - NULL AS customer_id, -- This violates NOT NULL - 100.50 AS amount, - '2024-01-01'::DATE AS order_date - UNION ALL - SELECT - 2 AS order_id, - 'customer_2' AS customer_id, - 200.75 AS amount, - '2024-01-02'::DATE AS order_date - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", audits=["not_null(columns := [customer_id])"] - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Plan should fail due to audit failure - with pytest.raises(PlanError): - context.plan("prod", no_prompts=True, auto_apply=True) - - -@pytest.mark.slow -def test_default_audits_with_model_specific_audits(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - audits_dir = tmp_path / "audits" - audits_dir.mkdir(exist_ok=True) - - create_temp_file( - tmp_path, - audits_dir / "range_check.sql", - dedent(""" - AUDIT ( - name range_check - ); - - SELECT * FROM @this_model - WHERE @column < @min_value OR @column > @max_value - """), - ) - - # Create a model with its own audits in addition to defaults - create_temp_file( - tmp_path, - models_dir / "products.sql", - dedent(""" - MODEL ( - name test.products, - kind FULL, - audits ( - range_check(column := price, min_value := 0, max_value := 10000) - ) - ); - - SELECT - 1 AS product_id, - 'Widget' AS product_name, - 99.99 AS price - UNION ALL - SELECT - 2 AS product_id, - 'Gadget' AS product_name, - 149.99 AS price - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", - audits=[ - "not_null(columns := [product_id, product_name])", - "unique_values(columns := [product_id])", - ], - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Create and apply plan - plan = context.plan("prod", no_prompts=True) - context.apply(plan) - - # Verify model has both default and model-specific audits - model = context.get_model("test.products") - assert len(model.audits) == 3 - - audit_names = [audit[0] for audit in model.audits] - assert "not_null" in audit_names - assert "unique_values" in audit_names - assert "range_check" in audit_names - - # Verify audit execution order, default audits first then model-specific - assert model.audits[0][0] == "not_null" - assert model.audits[1][0] == "unique_values" - assert model.audits[2][0] == "range_check" - - -@pytest.mark.slow -def test_default_audits_with_custom_audit_definitions(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - audits_dir = tmp_path / "audits" - audits_dir.mkdir(exist_ok=True) - - # Create custom audit definition - create_temp_file( - tmp_path, - audits_dir / "positive_amount.sql", - dedent(""" - AUDIT ( - name positive_amount - ); - - SELECT * FROM @this_model - WHERE @column <= 0 - """), - ) - - # Create a model - create_temp_file( - tmp_path, - models_dir / "transactions.sql", - dedent(""" - MODEL ( - name test.transactions, - kind FULL - ); - - SELECT - 1 AS transaction_id, - 'TXN001' AS transaction_code, - 250.00 AS amount, - '2024-01-01'::DATE AS transaction_date - UNION ALL - SELECT - 2 AS transaction_id, - 'TXN002' AS transaction_code, - 150.00 AS amount, - '2024-01-02'::DATE AS transaction_date - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", - audits=[ - "not_null(columns := [transaction_id, transaction_code])", - "unique_values(columns := [transaction_id])", - "positive_amount(column := amount)", - ], - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Create and apply plan - plan = context.plan("prod", no_prompts=True) - context.apply(plan) - - # Verify model has all default audits including custom - model = context.get_model("test.transactions") - assert len(model.audits) == 3 - - audit_names = [audit[0] for audit in model.audits] - assert "not_null" in audit_names - assert "unique_values" in audit_names - assert "positive_amount" in audit_names - - # Verify custom audit arguments - for audit_name, audit_args in model.audits: - if audit_name == "positive_amount": - assert "column" in audit_args - assert audit_args["column"].name == "amount" - - -def test_incremental_by_time_model_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - assert updated_df["new_column"].dropna().tolist() == [3] - - with time_machine.travel("2023-01-11 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - CAST(4 AS STRING) as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 3 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - # The destructive change was ignored but this change is coercable and therefore we still return ints - assert updated_df["new_column"].dropna().tolist() == [3, 4] - - with time_machine.travel("2023-01-12 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - CAST(5 AS STRING) as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - # Make the change compatible since that means we will attempt and alter now that is considered additive - context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { - exp.DataType.build("INT"): {exp.DataType.build("STRING")} - } - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 4 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - # The change is now reflected since an additive alter could be performed - assert updated_df["new_column"].dropna().tolist() == ["3", "4", "5"] - - context.close() - - -def test_incremental_by_time_model_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column to the source table - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("ALTER TABLE source_table ADD COLUMN new_column INT") - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is removed since destructive is allowed - assert "name" not in updated_df.columns - # new_column is not added since additive is ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was applied - assert "name" not in updated_df.columns - # new_column is still not added since additive is ignored - assert "new_column" not in updated_df.columns - - with time_machine.travel("2023-01-11 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - CAST(1 AS STRING) as id, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { - exp.DataType.build("INT"): {exp.DataType.build("STRING")} - } - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 3 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is still not added since additive is ignored - assert "new_column" not in updated_df.columns - # The additive change was ignored since we set the change as compatible therefore - # instead of getting strings in the result we still return ints - assert updated_df["id"].tolist() == [1, 1, 1] - - with time_machine.travel("2023-01-12 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change allow - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - CAST(1 AS STRING) as id, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - # Make the change compatible since that means we will attempt and alter now that is considered additive - context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { - exp.DataType.build("INT"): {exp.DataType.build("STRING")} - } - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 4 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is now added since it is additive is now allowed - assert "new_column" in updated_df.columns - # The change is now reflected since an additive alter could be performed - assert updated_df["id"].dropna().tolist() == ["1", "1", "1", "1"] - - context.close() - - -def test_incremental_by_unique_key_model_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_incremental_by_unique_key_model_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_incremental_unmanaged_model_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_incremental_unmanaged_model_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_scd_type_2_by_time_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_dt as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_dt as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_scd_type_2_by_time_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_dt as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_dt as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_scd_type_2_by_column_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [name], - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [new_column], - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_scd_type_2_by_column_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [stable], - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - 'stable' as stable, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [stable], - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'stable2' as stable, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was ignored - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_incremental_partition_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_incremental_partition_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change allow, - on_additive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change allow, - on_additive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_incremental_by_time_model_ignore_destructive_change_unit_test(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - test_dir = tmp_path / "tests" - test_dir.mkdir() - test_filepath = test_dir / "test_test_model.yaml" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - name, - ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - initial_test = f""" - -test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - name: 'test_name' - ds: '2025-01-01' - outputs: - query: - - id: 1 - name: 'test_name' - ds: '2025-01-01' -""" - - # Write initial test - test_filepath.write_text(initial_test) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute( - "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" - ) - context.engine_adapter.execute( - "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" - ) - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - new_column, - ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - updated_test = f""" - - test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - new_column: 3 - ds: '2025-01-01' - outputs: - query: - - id: 1 - new_column: 3 - ds: '2025-01-01' - """ - - # Write initial test - test_filepath.write_text(updated_test) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 1 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") - context.run() - test_result = context.test() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - -def test_incremental_by_time_model_ignore_additive_change_unit_test(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - test_dir = tmp_path / "tests" - test_dir.mkdir() - test_filepath = test_dir / "test_test_model.yaml" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - name, - ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - initial_test = f""" - -test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - name: 'test_name' - ds: '2025-01-01' - outputs: - query: - - id: 1 - name: 'test_name' - ds: '2025-01-01' -""" - - # Write initial test - test_filepath.write_text(initial_test) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute( - "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" - ) - context.engine_adapter.execute( - "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" - ) - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - new_column, - ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - # `new_column` is in the output since unit tests are based on the model definition that currently - # exists and doesn't take into account the historical changes to the table. Therefore `new_column` is - # not actually in the table but it is represented in the test - updated_test = f""" - test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - new_column: 3 - ds: '2025-01-01' - outputs: - query: - - id: 1 - new_column: 3 - ds: '2025-01-01' - """ - - # Write initial test - test_filepath.write_text(updated_test) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 1 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not in table since destructive was ignored - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") - context.run() - test_result = context.test() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - -def test_restatement_plan_interval_external_visibility(tmp_path: Path): - """ - Scenario: - - `prod` environment exists, models A <- B - - `dev` environment created, models A <- B(dev) <- C (dev) - - Restatement plan is triggered against `prod` for model A - - During restatement, a new dev environment `dev_2` is created with a new version of B(dev_2) - - Outcome: - - At no point are the prod_intervals considered "missing" from state for A - - The intervals for B(dev) and C(dev) are cleared - - The intervals for B(dev_2) are also cleared even though the environment didnt exist at the time the plan was started, - because they are based on the data from a partially restated version of A - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - lock_file_path = tmp_path / "test.lock" # python model blocks while this file is present - - evaluation_lock_file_path = ( - tmp_path / "evaluation.lock" - ) # python model creates this file if it's in the wait loop and deletes it once done - - # Note: to make execution block so we can test stuff, we use a Python model that blocks until it no longer detects the presence of a file - (models_dir / "model_a.py").write_text(f""" -from sqlmesh.core.model import model -from sqlmesh.core.macros import MacroEvaluator - -@model( - "test.model_a", - is_sql=True, - kind="FULL" -) -def entrypoint(evaluator: MacroEvaluator) -> str: - from pathlib import Path - import time - - if evaluator.runtime_stage == 'evaluating': - while True: - if Path("{str(lock_file_path)}").exists(): - Path("{str(evaluation_lock_file_path)}").touch() - print("lock exists; sleeping") - time.sleep(2) - else: - Path("{str(evaluation_lock_file_path)}").unlink(missing_ok=True) - break - - return "select 'model_a' as m" -""") - - (models_dir / "model_b.sql").write_text(""" - MODEL ( - name test.model_b, - kind FULL - ); - - select a.m as m, 'model_b' as mb from test.model_a as a - """) - - config = Config( - gateways={ - "": GatewayConfig( - connection=DuckDBConnectionConfig(database=str(tmp_path / "db.db")), - state_connection=DuckDBConnectionConfig(database=str(tmp_path / "state.db")), - ) - }, - model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), - ) - ctx = Context(paths=[tmp_path], config=config) - - ctx.plan(environment="prod", auto_apply=True) - - assert len(ctx.snapshots) == 2 - assert all(s.intervals for s in ctx.snapshots.values()) - - prod_model_a_snapshot_id = ctx.snapshots['"db"."test"."model_a"'].snapshot_id - prod_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id - - # dev models - # new version of B - (models_dir / "model_b.sql").write_text(""" - MODEL ( - name test.model_b, - kind FULL - ); - - select a.m as m, 'model_b' as mb, 'dev' as dev_version from test.model_a as a - """) - - # add C - (models_dir / "model_c.sql").write_text(""" - MODEL ( - name test.model_c, - kind FULL - ); - - select b.*, 'model_c' as mc from test.model_b as b - """) - - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - - dev_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id - dev_model_c_snapshot_id = ctx.snapshots['"db"."test"."model_c"'].snapshot_id - - assert dev_model_b_snapshot_id != prod_model_b_snapshot_id - - # now, we restate A in prod but touch the lockfile so it hangs during evaluation - # we also have to do it in its own thread due to the hang - lock_file_path.touch() - - def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue): - q.put("thread_started") - - # give this thread its own Context object to prevent segfaulting the Python interpreter - restatement_ctx = Context(paths=[tmp_path], config=config) - - # dev2 not present before the restatement plan starts - assert restatement_ctx.state_sync.get_environment("dev2") is None - - q.put("plan_started") - plan = restatement_ctx.plan( - environment="prod", restate_models=['"db"."test"."model_a"'], auto_apply=True - ) - q.put("plan_completed") - - # dev2 was created during the restatement plan - assert restatement_ctx.state_sync.get_environment("dev2") is not None - - return plan - - executor = ThreadPoolExecutor() - q: queue.Queue = queue.Queue() - restatement_plan_future = executor.submit(_run_restatement_plan, tmp_path, config, q) - assert q.get() == "thread_started" - - try: - if e := restatement_plan_future.exception(timeout=1): - # abort early if the plan thread threw an exception - raise e - except TimeoutError: - # that's ok, we dont actually expect the plan to have finished in 1 second - pass - - # while that restatement is running, we can simulate another process and check that it sees no empty intervals - assert q.get() == "plan_started" - - # dont check for potentially missing intervals until the plan is in the evaluation loop - attempts = 0 - while not evaluation_lock_file_path.exists(): - time.sleep(2) - attempts += 1 - if attempts > 10: - raise ValueError("Gave up waiting for evaluation loop") - - ctx.clear_caches() # get rid of the file cache so that data is re-fetched from state - prod_models_from_state = ctx.state_sync.get_snapshots( - snapshot_ids=[prod_model_a_snapshot_id, prod_model_b_snapshot_id] - ) - - # prod intervals should be present still - assert all(m.intervals for m in prod_models_from_state.values()) - - # so should dev intervals since prod restatement is still running - assert all(m.intervals for m in ctx.snapshots.values()) - - # now, lets create a new dev environment "dev2", while the prod restatement plan is still running, - # that changes model_b while still being based on the original version of model_a - (models_dir / "model_b.sql").write_text(""" - MODEL ( - name test.model_b, - kind FULL - ); - - select a.m as m, 'model_b' as mb, 'dev2' as dev_version from test.model_a as a - """) - ctx.load() - ctx.plan(environment="dev2", auto_apply=True) - - dev2_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id - assert dev2_model_b_snapshot_id != dev_model_b_snapshot_id - assert dev2_model_b_snapshot_id != prod_model_b_snapshot_id - - # as at this point, everything still has intervals - ctx.clear_caches() - assert all( - s.intervals - for s in ctx.state_sync.get_snapshots( - snapshot_ids=[ - prod_model_a_snapshot_id, - prod_model_b_snapshot_id, - dev_model_b_snapshot_id, - dev_model_c_snapshot_id, - dev2_model_b_snapshot_id, - ] - ).values() - ) - - # now, we finally let that restatement plan complete - # first, verify it's still blocked where it should be - assert not restatement_plan_future.done() - - lock_file_path.unlink() # remove lock file, plan should be able to proceed now - - if e := restatement_plan_future.exception(): # blocks until future complete - raise e - - assert restatement_plan_future.result() - assert q.get() == "plan_completed" - - ctx.clear_caches() - - # check that intervals in prod are present - assert all( - s.intervals - for s in ctx.state_sync.get_snapshots( - snapshot_ids=[ - prod_model_a_snapshot_id, - prod_model_b_snapshot_id, - ] - ).values() - ) - - # check that intervals in dev have been cleared, including the dev2 env that - # was created after the restatement plan started - assert all( - not s.intervals - for s in ctx.state_sync.get_snapshots( - snapshot_ids=[ - dev_model_b_snapshot_id, - dev_model_c_snapshot_id, - dev2_model_b_snapshot_id, - ] - ).values() - ) - - executor.shutdown() - - -def test_restatement_plan_detects_prod_deployment_during_restatement(tmp_path: Path): - """ - Scenario: - - `prod` environment exists, model A - - `dev` environment created, model A(dev) - - Restatement plan is triggered against `prod` for model A - - During restatement, someone else deploys A(dev) to prod, replacing the model that is currently being restated. - - Outcome: - - The deployment plan for dev -> prod should succeed in deploying the new version of A - - The prod restatement plan should fail with a ConflictingPlanError and warn about the model that got updated while undergoing restatement - - The new version of A should have no intervals cleared. The user needs to rerun the restatement if the intervals should still be cleared - """ - orig_console = get_console() - console = CaptureTerminalConsole() - set_console(console) - - models_dir = tmp_path / "models" - models_dir.mkdir() - - lock_file_path = tmp_path / "test.lock" # python model blocks while this file is present - - evaluation_lock_file_path = ( - tmp_path / "evaluation.lock" - ) # python model creates this file if it's in the wait loop and deletes it once done - - # Note: to make execution block so we can test stuff, we use a Python model that blocks until it no longer detects the presence of a file - (models_dir / "model_a.py").write_text(f""" -from sqlmesh.core.model import model -from sqlmesh.core.macros import MacroEvaluator - -@model( - "test.model_a", - is_sql=True, - kind="FULL" -) -def entrypoint(evaluator: MacroEvaluator) -> str: - from pathlib import Path - import time - - if evaluator.runtime_stage == 'evaluating': - while True: - if Path("{str(lock_file_path)}").exists(): - Path("{str(evaluation_lock_file_path)}").touch() - print("lock exists; sleeping") - time.sleep(2) - else: - Path("{str(evaluation_lock_file_path)}").unlink(missing_ok=True) - break - - return "select 'model_a' as m" -""") - - config = Config( - gateways={ - "": GatewayConfig( - connection=DuckDBConnectionConfig(database=str(tmp_path / "db.db")), - state_connection=DuckDBConnectionConfig(database=str(tmp_path / "state.db")), - ) - }, - model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), - ) - ctx = Context(paths=[tmp_path], config=config) - - # create prod - ctx.plan(environment="prod", auto_apply=True) - original_prod = ctx.state_sync.get_environment("prod") - assert original_prod - - # update model_a for dev - (models_dir / "model_a.py").unlink() - (models_dir / "model_a.sql").write_text(""" - MODEL ( - name test.model_a, - kind FULL - ); - - select 1 as changed - """) - - # create dev - ctx.load() - plan = ctx.plan(environment="dev", auto_apply=True) - assert len(plan.modified_snapshots) == 1 - new_model_a_snapshot_id = list(plan.modified_snapshots)[0] - - # now, trigger a prod restatement plan in a different thread and block it to simulate a long restatement - thread_console = None - - def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue): - nonlocal thread_console - q.put("thread_started") - - # Give this thread its own markdown console to avoid Rich LiveError - thread_console = MarkdownConsole() - set_console(thread_console) - - # give this thread its own Context object to prevent segfaulting the Python interpreter - restatement_ctx = Context(paths=[tmp_path], config=config) - - # ensure dev is present before the restatement plan starts - assert restatement_ctx.state_sync.get_environment("dev") is not None - - q.put("plan_started") - expected_error = None - try: - restatement_ctx.plan( - environment="prod", restate_models=['"db"."test"."model_a"'], auto_apply=True - ) - except ConflictingPlanError as e: - expected_error = e - - q.put("plan_completed") - return expected_error - - executor = ThreadPoolExecutor() - q: queue.Queue = queue.Queue() - lock_file_path.touch() - - restatement_plan_future = executor.submit(_run_restatement_plan, tmp_path, config, q) - restatement_plan_future.add_done_callback(lambda _: executor.shutdown()) - - assert q.get() == "thread_started" - - try: - if e := restatement_plan_future.exception(timeout=1): - # abort early if the plan thread threw an exception - raise e - except TimeoutError: - # that's ok, we dont actually expect the plan to have finished in 1 second - pass - - assert q.get() == "plan_started" - - # ok, now the prod restatement plan is running, let's deploy dev to prod - ctx.plan(environment="prod", auto_apply=True) - - new_prod = ctx.state_sync.get_environment("prod") - assert new_prod - assert new_prod.plan_id != original_prod.plan_id - assert new_prod.previous_plan_id == original_prod.plan_id - - # new prod is deployed but restatement plan is still running - assert not restatement_plan_future.done() - - # allow restatement plan to complete - lock_file_path.unlink() - - plan_error = restatement_plan_future.result() - assert isinstance(plan_error, ConflictingPlanError) - assert "please re-apply your plan" in repr(plan_error).lower() - - output = " ".join(re.split("\\s+", thread_console.captured_output, flags=re.UNICODE)) # type: ignore - assert ( - f"The following models had new versions deployed while data was being restated: └── test.model_a" - in output - ) - - # check that no intervals have been cleared from the model_a currently in prod - model_a = ctx.state_sync.get_snapshots(snapshot_ids=[new_model_a_snapshot_id])[ - new_model_a_snapshot_id - ] - assert isinstance(model_a.node, SqlModel) - assert model_a.node.render_query_or_raise().sql() == 'SELECT 1 AS "changed"' - assert len(model_a.intervals) - - set_console(orig_console) - - -def test_seed_model_metadata_update_does_not_trigger_backfill(tmp_path: Path): - """ - Scenario: - - Create a seed model; perform initial population - - Modify the model with a metadata-only change and trigger a plan - - Outcome: - - The seed model is modified (metadata-only) but this should NOT trigger backfill - - There should be no missing_intervals on the plan to backfill - """ - - models_path = tmp_path / "models" - seeds_path = tmp_path / "seeds" - models_path.mkdir() - seeds_path.mkdir() - - seed_model_path = models_path / "seed.sql" - seed_path = seeds_path / "seed_data.csv" - - seed_path.write_text("\n".join(["id,name", "1,test"])) - - seed_model_path.write_text(""" - MODEL ( - name test.source_data, - kind SEED ( - path '../seeds/seed_data.csv' - ) - ); - """) - - config = Config( - gateways={"": GatewayConfig(connection=DuckDBConnectionConfig())}, - model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), - ) - ctx = Context(paths=tmp_path, config=config) - - plan = ctx.plan(auto_apply=True) - - original_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] - assert plan.directly_modified == {original_seed_snapshot.snapshot_id} - assert plan.metadata_updated == set() - assert plan.missing_intervals - - # prove data loaded - assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [ - (1, "test") - ] - - # prove no diff - ctx.load() - plan = ctx.plan(auto_apply=True) - assert not plan.has_changes - assert not plan.missing_intervals - - # make metadata-only change - seed_model_path.write_text(""" - MODEL ( - name test.source_data, - kind SEED ( - path '../seeds/seed_data.csv' - ), - description 'updated by test' - ); - """) - - ctx.load() - plan = ctx.plan(auto_apply=True) - assert plan.has_changes - - new_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] - assert ( - new_seed_snapshot.version == original_seed_snapshot.version - ) # should be using the same physical table - assert ( - new_seed_snapshot.snapshot_id != original_seed_snapshot.snapshot_id - ) # but still be different due to the metadata change - assert plan.directly_modified == set() - assert plan.metadata_updated == {new_seed_snapshot.snapshot_id} - - # there should be no missing intervals to backfill since all we did is update a description - assert not plan.missing_intervals - - # there should still be no diff or missing intervals in 3 days time - assert new_seed_snapshot.model.interval_unit.is_day - with time_machine.travel(timedelta(days=3)): - ctx.clear_caches() - ctx.load() - plan = ctx.plan(auto_apply=True) - assert not plan.has_changes - assert not plan.missing_intervals - - # change seed data - seed_path.write_text("\n".join(["id,name", "1,test", "2,updated"])) - - # new plan - NOW we should backfill because data changed - ctx.load() - plan = ctx.plan(auto_apply=True) - assert plan.has_changes - - updated_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] - - assert ( - updated_seed_snapshot.snapshot_id - != new_seed_snapshot.snapshot_id - != original_seed_snapshot.snapshot_id - ) - assert not updated_seed_snapshot.forward_only - assert plan.directly_modified == {updated_seed_snapshot.snapshot_id} - assert plan.metadata_updated == set() - assert plan.missing_intervals - - # prove backfilled data loaded - assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [ - (1, "test"), - (2, "updated"), - ] From e3e57d5ddd2a26a702076c270ad38e3b80939d4c Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Tue, 23 Sep 2025 18:24:59 -0700 Subject: [PATCH 014/173] Chore: Remove dead code --- tests/core/integration/test_forward_only.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/tests/core/integration/test_forward_only.py b/tests/core/integration/test_forward_only.py index 4d61915305..2dddf18efd 100644 --- a/tests/core/integration/test_forward_only.py +++ b/tests/core/integration/test_forward_only.py @@ -5,7 +5,6 @@ import pandas as pd # noqa: TID253 import pytest import time_machine -from pytest_mock.plugin import MockerFixture from sqlmesh.core import dialect as d from sqlmesh.core.context import Context @@ -15,7 +14,7 @@ SqlModel, load_sql_based_model, ) -from sqlmesh.core.plan import PlanBuilder, SnapshotIntervals +from sqlmesh.core.plan import SnapshotIntervals from sqlmesh.core.snapshot import ( SnapshotChangeCategory, ) @@ -25,18 +24,6 @@ pytestmark = pytest.mark.slow -@pytest.fixture(autouse=True) -def mock_choices(mocker: MockerFixture): - mocker.patch("sqlmesh.core.console.TerminalConsole._get_snapshot_change_category") - mocker.patch("sqlmesh.core.console.TerminalConsole._prompt_backfill") - - -def plan_choice(plan_builder: PlanBuilder, choice: SnapshotChangeCategory) -> None: - for snapshot in plan_builder.build().snapshots.values(): - if not snapshot.version: - plan_builder.set_choice(snapshot, choice) - - @time_machine.travel("2023-01-08 15:00:00 UTC") @pytest.mark.parametrize( "context_fixture", From aa515c543723077ec647d71da3c3e8725e9fbf18 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Wed, 24 Sep 2025 12:17:00 +0300 Subject: [PATCH 015/173] Fix: warn on dbt variable definition failure instead of raising (#5427) --- sqlmesh/dbt/context.py | 5 ++++- tests/dbt/test_config.py | 2 ++ tests/fixtures/dbt/sushi_test/dbt_project.yml | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sqlmesh/dbt/context.py b/sqlmesh/dbt/context.py index a56a6ca4d6..67e70d3c79 100644 --- a/sqlmesh/dbt/context.py +++ b/sqlmesh/dbt/context.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import typing as t from dataclasses import dataclass, field, replace from pathlib import Path @@ -28,6 +29,8 @@ from sqlmesh.dbt.seed import SeedConfig from sqlmesh.dbt.source import SourceConfig +logger = logging.getLogger(__name__) + @dataclass class DbtContext: @@ -125,7 +128,7 @@ def _var(name: str, default: t.Optional[t.Any] = None) -> t.Any: try: rendered_variables[k] = _render_var(v) except Exception as ex: - raise ConfigError(f"Failed to render variable '{k}', value '{v}': {ex}") from ex + logger.warning(f"Failed to render variable '{k}', value '{v}': {ex}") self.variables = rendered_variables diff --git a/tests/dbt/test_config.py b/tests/dbt/test_config.py index 0e96024aa1..c484b8e126 100644 --- a/tests/dbt/test_config.py +++ b/tests/dbt/test_config.py @@ -352,6 +352,7 @@ def test_variables(assert_exp_eq, sushi_test_project): "some_var": ["foo", "bar"], }, "some_var": "should be overridden in customers package", + "invalid_var": "{{ ref('ref_without_closing_paren' }}", } expected_customer_variables = { "some_var": ["foo", "bar"], # Takes precedence over the root project variable @@ -370,6 +371,7 @@ def test_variables(assert_exp_eq, sushi_test_project): {"name": "item1", "value": 1}, {"name": "item2", "value": 2}, ], + "invalid_var": "{{ ref('ref_without_closing_paren' }}", } assert sushi_test_project.packages["sushi"].variables == expected_sushi_variables assert sushi_test_project.packages["customers"].variables == expected_customer_variables diff --git a/tests/fixtures/dbt/sushi_test/dbt_project.yml b/tests/fixtures/dbt/sushi_test/dbt_project.yml index 920dea7216..0b5f6b0f83 100644 --- a/tests/fixtures/dbt/sushi_test/dbt_project.yml +++ b/tests/fixtures/dbt/sushi_test/dbt_project.yml @@ -66,6 +66,9 @@ vars: - name: 'item2' value: 2 + # Despite this being an invalid variable definition, dbt doesn't mind if it's unused + invalid_var: "{{ ref('ref_without_closing_paren' }}" + on-run-start: - 'CREATE TABLE IF NOT EXISTS analytic_stats (physical_table VARCHAR, evaluation_time VARCHAR);' From 0bda998a2138959afd7a0083772900203af2514f Mon Sep 17 00:00:00 2001 From: Vincent Chan Date: Wed, 24 Sep 2025 12:00:10 -0700 Subject: [PATCH 016/173] Fix: Include root package in search candidates when resolving dbt macros (#5349) --- sqlmesh/dbt/adapter.py | 13 +++++++++---- tests/dbt/test_adapter.py | 1 + tests/fixtures/dbt/sushi_test/macros/distinct.sql | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 tests/fixtures/dbt/sushi_test/macros/distinct.sql diff --git a/sqlmesh/dbt/adapter.py b/sqlmesh/dbt/adapter.py index 12e38e4749..7f7c7eb4fb 100644 --- a/sqlmesh/dbt/adapter.py +++ b/sqlmesh/dbt/adapter.py @@ -139,10 +139,15 @@ def _relevance(package_name_pair: t.Tuple[t.Optional[str], str]) -> t.Tuple[int, return name_score, package_score jinja_env = self.jinja_macros.build_environment(**self.jinja_globals).globals - packages_to_check: t.List[t.Optional[str]] = [ - macro_namespace, - *(k for k in jinja_env if k.startswith("dbt")), - ] + + packages_to_check: t.List[t.Optional[str]] = [None] + if macro_namespace is not None: + if macro_namespace in jinja_env: + packages_to_check = [self.jinja_macros.root_package_name, macro_namespace] + + # Add dbt packages as fallback + packages_to_check.extend(k for k in jinja_env if k.startswith("dbt")) + candidates = {} for macro_package in packages_to_check: macros = jinja_env.get(macro_package, {}) if macro_package else jinja_env diff --git a/tests/dbt/test_adapter.py b/tests/dbt/test_adapter.py index 5617d8c5c3..381401ce73 100644 --- a/tests/dbt/test_adapter.py +++ b/tests/dbt/test_adapter.py @@ -242,6 +242,7 @@ def test_adapter_dispatch(sushi_test_project: Project, runtime_renderer: t.Calla assert renderer("{{ adapter.dispatch('current_engine', 'customers')() }}") == "duckdb" assert renderer("{{ adapter.dispatch('current_timestamp')() }}") == "now()" assert renderer("{{ adapter.dispatch('current_timestamp', 'dbt')() }}") == "now()" + assert renderer("{{ adapter.dispatch('select_distinct', 'customers')() }}") == "distinct" # test with keyword arguments assert ( diff --git a/tests/fixtures/dbt/sushi_test/macros/distinct.sql b/tests/fixtures/dbt/sushi_test/macros/distinct.sql new file mode 100644 index 0000000000..1b339a9349 --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/macros/distinct.sql @@ -0,0 +1 @@ +{% macro default__select_distinct() %}distinct{% endmacro %} From 368c5ddf84e15fa9786745891ce5ed9ff4ebe59a Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 25 Sep 2025 09:07:49 +1200 Subject: [PATCH 017/173] Feat(sqlmesh_dbt): Select based on dbt name, not sqlmesh name (#5420) --- sqlmesh/core/context.py | 6 +- sqlmesh/core/selector.py | 77 ++++++++++++- sqlmesh_dbt/operations.py | 5 +- tests/core/test_selector.py | 20 ++-- tests/dbt/cli/test_list.py | 8 +- tests/dbt/cli/test_operations.py | 10 +- tests/dbt/cli/test_run.py | 2 +- tests/dbt/cli/test_selectors.py | 192 +++++++++++++++++++++++++++++++ tests/dbt/conftest.py | 3 +- 9 files changed, 293 insertions(+), 30 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 437fbd6edd..e3feb1e14b 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -93,7 +93,7 @@ from sqlmesh.core.reference import ReferenceGraph from sqlmesh.core.scheduler import Scheduler, CompletionStatus from sqlmesh.core.schema_loader import create_external_models_file -from sqlmesh.core.selector import Selector +from sqlmesh.core.selector import Selector, NativeSelector from sqlmesh.core.snapshot import ( DeployabilityIndex, Snapshot, @@ -368,6 +368,7 @@ def __init__( load: bool = True, users: t.Optional[t.List[User]] = None, config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, + selector: t.Optional[t.Type[Selector]] = None, ): self.configs = ( config @@ -390,6 +391,7 @@ def __init__( self._engine_adapter: t.Optional[EngineAdapter] = None self._linters: t.Dict[str, Linter] = {} self._loaded: bool = False + self._selector_cls = selector or NativeSelector self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) @@ -2893,7 +2895,7 @@ def _new_state_sync(self) -> StateSync: def _new_selector( self, models: t.Optional[UniqueKeyDict[str, Model]] = None, dag: t.Optional[DAG[str]] = None ) -> Selector: - return Selector( + return self._selector_cls( self.state_reader, models=models or self._models, context_path=self.path, diff --git a/sqlmesh/core/selector.py b/sqlmesh/core/selector.py index c44065bdc0..1484d06cee 100644 --- a/sqlmesh/core/selector.py +++ b/sqlmesh/core/selector.py @@ -3,6 +3,8 @@ import fnmatch import typing as t from pathlib import Path +from itertools import zip_longest +import abc from sqlglot import exp from sqlglot.errors import ParseError @@ -26,7 +28,7 @@ from sqlmesh.core.state_sync import StateReader -class Selector: +class Selector(abc.ABC): def __init__( self, state_reader: StateReader, @@ -167,13 +169,13 @@ def get_model(fqn: str) -> t.Optional[Model]: def expand_model_selections( self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Model]] = None ) -> t.Set[str]: - """Expands a set of model selections into a set of model names. + """Expands a set of model selections into a set of model fqns that can be looked up in the Context. Args: model_selections: A set of model selections. Returns: - A set of model names. + A set of model fqns. """ node = parse(" | ".join(f"({s})" for s in model_selections)) @@ -194,10 +196,9 @@ def evaluate(node: exp.Expression) -> t.Set[str]: return { fqn for fqn, model in all_models.items() - if fnmatch.fnmatchcase(model.name, node.this) + if fnmatch.fnmatchcase(self._model_name(model), node.this) } - fqn = normalize_model_name(pattern, self._default_catalog, self._dialect) - return {fqn} if fqn in all_models else set() + return self._pattern_to_model_fqns(pattern, all_models) if isinstance(node, exp.And): return evaluate(node.left) & evaluate(node.right) if isinstance(node, exp.Or): @@ -241,6 +242,70 @@ def evaluate(node: exp.Expression) -> t.Set[str]: return evaluate(node) + @abc.abstractmethod + def _model_name(self, model: Model) -> str: + """Given a model, return the name that a selector pattern contining wildcards should be fnmatch'd on""" + pass + + @abc.abstractmethod + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Model]) -> t.Set[str]: + """Given a pattern, return the keys of the matching models from :all_models""" + pass + + +class NativeSelector(Selector): + """Implementation of selectors that matches objects based on SQLMesh native names""" + + def _model_name(self, model: Model) -> str: + return model.name + + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Model]) -> t.Set[str]: + fqn = normalize_model_name(pattern, self._default_catalog, self._dialect) + return {fqn} if fqn in all_models else set() + + +class DbtSelector(Selector): + """Implementation of selectors that matches objects based on the DBT names instead of the SQLMesh native names""" + + def _model_name(self, model: Model) -> str: + if dbt_fqn := model.dbt_fqn: + return dbt_fqn + raise SQLMeshError("dbt node information must be populated to use dbt selectors") + + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Model]) -> t.Set[str]: + # a pattern like "staging.customers" should match a model called "jaffle_shop.staging.customers" + # but not a model called "jaffle_shop.customers.staging" + # also a pattern like "aging" should not match "staging" so we need to consider components; not substrings + pattern_components = pattern.split(".") + first_pattern_component = pattern_components[0] + matches = set() + for fqn, model in all_models.items(): + if not model.dbt_fqn: + continue + + dbt_fqn_components = model.dbt_fqn.split(".") + try: + starting_idx = dbt_fqn_components.index(first_pattern_component) + except ValueError: + continue + for pattern_component, fqn_component in zip_longest( + pattern_components, dbt_fqn_components[starting_idx:] + ): + if pattern_component and not fqn_component: + # the pattern still goes but we have run out of fqn components to match; no match + break + if fqn_component and not pattern_component: + # all elements of the pattern have matched elements of the fqn; match + matches.add(fqn) + break + if pattern_component != fqn_component: + # the pattern explicitly doesnt match a component; no match + break + else: + # called if no explicit break, indicating all components of the pattern matched all components of the fqn + matches.add(fqn) + return matches + class SelectorDialect(Dialect): IDENTIFIERS_CAN_START_WITH_DIGIT = True diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py index e15a2cb93e..a157705ffd 100644 --- a/sqlmesh_dbt/operations.py +++ b/sqlmesh_dbt/operations.py @@ -185,7 +185,7 @@ def _plan_builder_options( options.update( dict( # Add every selected model as a restatement to force them to get repopulated from scratch - restate_models=list(self.context.models) + restate_models=[m.dbt_fqn for m in self.context.models.values() if m.dbt_fqn] if not select_models else select_models, # by default in SQLMesh, restatements only operate on what has been committed to state. @@ -231,6 +231,7 @@ def create( from sqlmesh.core.console import set_console from sqlmesh_dbt.console import DbtCliConsole from sqlmesh.utils.errors import SQLMeshError + from sqlmesh.core.selector import DbtSelector # clear any existing handlers set up by click/rich as defaults so that once SQLMesh logging config is applied, # we dont get duplicate messages logged from things like console.log_warning() @@ -250,6 +251,8 @@ def create( paths=[project_dir], config_loader_kwargs=dict(profile=profile, target=target, variables=vars), load=True, + # DbtSelector selects based on dbt model fqn's rather than SQLMesh model names + selector=DbtSelector, ) dbt_loader = sqlmesh_context._loaders[0] diff --git a/tests/core/test_selector.py b/tests/core/test_selector.py index 80b9ef691e..46d666db64 100644 --- a/tests/core/test_selector.py +++ b/tests/core/test_selector.py @@ -12,7 +12,7 @@ from sqlmesh.core.environment import Environment from sqlmesh.core.model import Model, SqlModel from sqlmesh.core.model.common import ParsableSql -from sqlmesh.core.selector import Selector +from sqlmesh.core.selector import NativeSelector from sqlmesh.core.snapshot import SnapshotChangeCategory from sqlmesh.utils import UniqueKeyDict from sqlmesh.utils.date import now_timestamp @@ -88,7 +88,7 @@ def test_select_models(mocker: MockerFixture, make_snapshot, default_catalog: t. local_models[modified_model_v2.fqn] = modified_model_v2.copy( update={"mapping_schema": added_model_schema} ) - selector = Selector(state_reader_mock, local_models, default_catalog=default_catalog) + selector = NativeSelector(state_reader_mock, local_models, default_catalog=default_catalog) _assert_models_equal( selector.select_models(["db.added_model"], env_name), @@ -243,7 +243,7 @@ def test_select_models_expired_environment(mocker: MockerFixture, make_snapshot) local_models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") local_models[modified_model_v2.fqn] = modified_model_v2 - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) _assert_models_equal( selector.select_models(["*.modified_model"], env_name, fallback_env_name="prod"), @@ -305,7 +305,7 @@ def test_select_change_schema(mocker: MockerFixture, make_snapshot): local_child = child.copy(update={"mapping_schema": {'"db"': {'"parent"': {"b": "INT"}}}}) local_models[local_child.fqn] = local_child - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) selected = selector.select_models(["db.parent"], env_name) assert selected[local_child.fqn].render_query() != child.render_query() @@ -339,7 +339,7 @@ def test_select_models_missing_env(mocker: MockerFixture, make_snapshot): local_models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") local_models[model.fqn] = model - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) assert selector.select_models([model.name], "missing_env").keys() == {model.fqn} assert not selector.select_models(["missing"], "missing_env") @@ -563,7 +563,7 @@ def test_expand_model_selections( ) models[model.fqn] = model - selector = Selector(mocker.Mock(), models) + selector = NativeSelector(mocker.Mock(), models) assert selector.expand_model_selections(selections) == output @@ -576,7 +576,7 @@ def test_model_selection_normalized(mocker: MockerFixture, make_snapshot): dialect="bigquery", ) models[model.fqn] = model - selector = Selector(mocker.Mock(), models, dialect="bigquery") + selector = NativeSelector(mocker.Mock(), models, dialect="bigquery") assert selector.expand_model_selections(["db.test_Model"]) == {'"db"."test_Model"'} @@ -624,7 +624,7 @@ def test_expand_git_selection( git_client_mock.list_uncommitted_changed_files.return_value = [] git_client_mock.list_committed_changed_files.return_value = [model_a._path, model_c._path] - selector = Selector(mocker.Mock(), models) + selector = NativeSelector(mocker.Mock(), models) selector._git_client = git_client_mock assert selector.expand_model_selections(expressions) == expected_fqns @@ -658,7 +658,7 @@ def test_select_models_with_external_parent(mocker: MockerFixture): local_models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") local_models[added_model.fqn] = added_model - selector = Selector(state_reader_mock, local_models, default_catalog=default_catalog) + selector = NativeSelector(state_reader_mock, local_models, default_catalog=default_catalog) expanded_selections = selector.expand_model_selections(["+*added_model*"]) assert expanded_selections == {added_model.fqn} @@ -699,7 +699,7 @@ def test_select_models_local_tags_take_precedence_over_remote( local_models[local_existing.fqn] = local_existing local_models[local_new.fqn] = local_new - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) selected = selector.select_models(["tag:a"], env_name) diff --git a/tests/dbt/cli/test_list.py b/tests/dbt/cli/test_list.py index 4d294decc1..712d80b2fe 100644 --- a/tests/dbt/cli/test_list.py +++ b/tests/dbt/cli/test_list.py @@ -19,7 +19,7 @@ def test_list(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): def test_list_select(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): - result = invoke_cli(["list", "--select", "main.raw_customers+"]) + result = invoke_cli(["list", "--select", "raw_customers+"]) assert result.exit_code == 0 assert not result.exception @@ -34,7 +34,7 @@ def test_list_select(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Resul def test_list_select_exclude(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): # single exclude - result = invoke_cli(["list", "--select", "main.raw_customers+", "--exclude", "main.orders"]) + result = invoke_cli(["list", "--select", "raw_customers+", "--exclude", "orders"]) assert result.exit_code == 0 assert not result.exception @@ -49,8 +49,8 @@ def test_list_select_exclude(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[.. # multiple exclude for args in ( - ["--select", "main.stg_orders+", "--exclude", "main.customers", "--exclude", "main.orders"], - ["--select", "main.stg_orders+", "--exclude", "main.customers main.orders"], + ["--select", "stg_orders+", "--exclude", "customers", "--exclude", "orders"], + ["--select", "stg_orders+", "--exclude", "customers orders"], ): result = invoke_cli(["list", *args]) assert result.exit_code == 0 diff --git a/tests/dbt/cli/test_operations.py b/tests/dbt/cli/test_operations.py index 769887efe4..b23c87882a 100644 --- a/tests/dbt/cli/test_operations.py +++ b/tests/dbt/cli/test_operations.py @@ -138,7 +138,7 @@ def test_run_option_mapping(jaffle_shop_duckdb: Path): assert plan.selected_models_to_backfill is None assert {s.name for s in plan.snapshots} == {k for k in operations.context.snapshots} - plan = operations.run(select=["main.stg_orders+"]) + plan = operations.run(select=["stg_orders+"]) assert plan.environment.name == "prod" assert console.no_prompts is True assert console.no_diff is True @@ -155,7 +155,7 @@ def test_run_option_mapping(jaffle_shop_duckdb: Path): plan.selected_models_to_backfill | {standalone_audit_name} ) - plan = operations.run(select=["main.stg_orders+"], exclude=["main.customers"]) + plan = operations.run(select=["stg_orders+"], exclude=["customers"]) assert plan.environment.name == "prod" assert console.no_prompts is True assert console.no_diff is True @@ -171,7 +171,7 @@ def test_run_option_mapping(jaffle_shop_duckdb: Path): plan.selected_models_to_backfill | {standalone_audit_name} ) - plan = operations.run(exclude=["main.customers"]) + plan = operations.run(exclude=["customers"]) assert plan.environment.name == "prod" assert console.no_prompts is True assert console.no_diff is True @@ -238,7 +238,7 @@ def test_run_option_mapping_dev(jaffle_shop_duckdb: Path): assert plan.skip_backfill is True assert plan.selected_models_to_backfill == {'"jaffle_shop"."main"."new_model"'} - plan = operations.run(environment="dev", select=["main.stg_orders+"]) + plan = operations.run(environment="dev", select=["stg_orders+"]) assert plan.environment.name == "dev" assert console.no_prompts is True assert console.no_diff is True @@ -325,7 +325,7 @@ def test_run_option_full_refresh_with_selector(jaffle_shop_duckdb: Path): console = PlanCapturingConsole() operations.context.console = console - plan = operations.run(select=["main.stg_customers"], full_refresh=True) + plan = operations.run(select=["stg_customers"], full_refresh=True) assert len(plan.restatements) == 1 assert list(plan.restatements)[0].name == '"jaffle_shop"."main"."stg_customers"' diff --git a/tests/dbt/cli/test_run.py b/tests/dbt/cli/test_run.py index 788a7b04a8..7aeb8dd4d7 100644 --- a/tests/dbt/cli/test_run.py +++ b/tests/dbt/cli/test_run.py @@ -27,7 +27,7 @@ def test_run_with_selectors(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[... assert result.exit_code == 0 assert "main.orders" in result.output - result = invoke_cli(["run", "--select", "main.raw_customers+", "--exclude", "main.orders"]) + result = invoke_cli(["run", "--select", "raw_customers+", "--exclude", "orders"]) assert result.exit_code == 0 assert not result.exception diff --git a/tests/dbt/cli/test_selectors.py b/tests/dbt/cli/test_selectors.py index 6041a50d0a..99907bda84 100644 --- a/tests/dbt/cli/test_selectors.py +++ b/tests/dbt/cli/test_selectors.py @@ -1,6 +1,9 @@ import typing as t import pytest from sqlmesh_dbt import selectors +from sqlmesh.core.selector import DbtSelector +from sqlmesh.core.context import Context +from pathlib import Path @pytest.mark.parametrize( @@ -77,3 +80,192 @@ def test_split_unions_and_intersections( expression: str, expected: t.Tuple[t.List[str], t.List[str]] ): assert selectors._split_unions_and_intersections(expression) == expected + + +@pytest.mark.parametrize( + "dbt_select,expected", + [ + (["aging"], set()), + ( + ["staging"], + { + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + }, + ), + (["staging.stg_customers"], {'"jaffle_shop"."main"."stg_customers"'}), + (["stg_customers.staging"], set()), + ( + ["+customers"], + { + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + '"jaffle_shop"."main"."raw_customers"', + '"jaffle_shop"."main"."raw_orders"', + '"jaffle_shop"."main"."raw_payments"', + }, + ), + (["customers+"], {'"jaffle_shop"."main"."customers"'}), + ( + ["customers+", "stg_orders"], + {'"jaffle_shop"."main"."customers"', '"jaffle_shop"."main"."stg_orders"'}, + ), + (["*.staging.stg_c*"], {'"jaffle_shop"."main"."stg_customers"'}), + (["tag:agg"], {'"jaffle_shop"."main"."agg_orders"'}), + ( + ["staging.stg_customers", "tag:agg"], + { + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."agg_orders"', + }, + ), + ( + ["+tag:agg"], + { + '"jaffle_shop"."main"."agg_orders"', + '"jaffle_shop"."main"."orders"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + '"jaffle_shop"."main"."raw_orders"', + '"jaffle_shop"."main"."raw_payments"', + }, + ), + ( + ["tag:agg+"], + { + '"jaffle_shop"."main"."agg_orders"', + }, + ), + ( + ["tag:b*"], + set(), + ), + ( + ["tag:a*"], + { + '"jaffle_shop"."main"."agg_orders"', + }, + ), + ], +) +def test_select_by_dbt_names( + jaffle_shop_duckdb: Path, + jaffle_shop_duckdb_context: Context, + dbt_select: t.List[str], + expected: t.Set[str], +): + (jaffle_shop_duckdb / "models" / "agg_orders.sql").write_text(""" + {{ config(tags=["agg"]) }} + select order_date, count(*) as num_orders from {{ ref('orders') }} + """) + + ctx = jaffle_shop_duckdb_context + ctx.load() + assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + sqlmesh_selector = selectors.to_sqlmesh(dbt_select=dbt_select, dbt_exclude=[]) + assert sqlmesh_selector + + assert selector.expand_model_selections([sqlmesh_selector]) == expected + + +@pytest.mark.parametrize( + "dbt_exclude,expected", + [ + (["jaffle_shop"], set()), + ( + ["staging"], + { + '"jaffle_shop"."main"."agg_orders"', + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."orders"', + '"jaffle_shop"."main"."raw_customers"', + '"jaffle_shop"."main"."raw_orders"', + '"jaffle_shop"."main"."raw_payments"', + }, + ), + (["+customers"], {'"jaffle_shop"."main"."orders"', '"jaffle_shop"."main"."agg_orders"'}), + ( + ["+tag:agg"], + { + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."raw_customers"', + }, + ), + ], +) +def test_exclude_by_dbt_names( + jaffle_shop_duckdb: Path, + jaffle_shop_duckdb_context: Context, + dbt_exclude: t.List[str], + expected: t.Set[str], +): + (jaffle_shop_duckdb / "models" / "agg_orders.sql").write_text(""" + {{ config(tags=["agg"]) }} + select order_date, count(*) as num_orders from {{ ref('orders') }} + """) + + ctx = jaffle_shop_duckdb_context + ctx.load() + assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + sqlmesh_selector = selectors.to_sqlmesh(dbt_select=[], dbt_exclude=dbt_exclude) + assert sqlmesh_selector + + assert selector.expand_model_selections([sqlmesh_selector]) == expected + + +@pytest.mark.parametrize( + "dbt_select,dbt_exclude,expected", + [ + (["jaffle_shop"], ["jaffle_shop"], set()), + ( + ["staging"], + ["stg_customers"], + { + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + }, + ), + ( + ["staging.stg_customers", "tag:agg"], + ["tag:agg"], + { + '"jaffle_shop"."main"."stg_customers"', + }, + ), + ], +) +def test_selection_and_exclusion_by_dbt_names( + jaffle_shop_duckdb: Path, + jaffle_shop_duckdb_context: Context, + dbt_select: t.List[str], + dbt_exclude: t.List[str], + expected: t.Set[str], +): + (jaffle_shop_duckdb / "models" / "agg_orders.sql").write_text(""" + {{ config(tags=["agg"]) }} + select order_date, count(*) as num_orders from {{ ref('orders') }} + """) + + ctx = jaffle_shop_duckdb_context + ctx.load() + assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + sqlmesh_selector = selectors.to_sqlmesh(dbt_select=dbt_select, dbt_exclude=dbt_exclude) + assert sqlmesh_selector + + assert selector.expand_model_selections([sqlmesh_selector]) == expected diff --git a/tests/dbt/conftest.py b/tests/dbt/conftest.py index 56d77e7496..846dfc6aa9 100644 --- a/tests/dbt/conftest.py +++ b/tests/dbt/conftest.py @@ -7,6 +7,7 @@ import pytest from sqlmesh.core.context import Context +from sqlmesh.core.selector import DbtSelector from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.project import Project from sqlmesh.dbt.target import PostgresConfig @@ -99,7 +100,7 @@ def jaffle_shop_duckdb(copy_to_temp_path: t.Callable[..., t.List[Path]]) -> t.It @pytest.fixture def jaffle_shop_duckdb_context(jaffle_shop_duckdb: Path) -> Context: init_project_if_required(jaffle_shop_duckdb) - return Context(paths=[jaffle_shop_duckdb]) + return Context(paths=[jaffle_shop_duckdb], selector=DbtSelector) @pytest.fixture() From 92e4a32ed61dbf6106935c76000ad09c2fd2ffa5 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Thu, 25 Sep 2025 15:09:58 +0300 Subject: [PATCH 018/173] Chore!: bump sqlglot to v27.18.0 (#5439) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b3e13b63ee..35376dd095 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.17.0", + "sqlglot[rs]~=27.18.0", "tenacity", "time-machine", "json-stream" From 15a0e1002e995999ef93f249b45cdcd9d4f53718 Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:40:02 -0700 Subject: [PATCH 019/173] fix: include forward_only parsed snapshot (#5442) --- sqlmesh/core/state_sync/db/migrator.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/state_sync/db/migrator.py b/sqlmesh/core/state_sync/db/migrator.py index b803a5cc40..3e3f978b96 100644 --- a/sqlmesh/core/state_sync/db/migrator.py +++ b/sqlmesh/core/state_sync/db/migrator.py @@ -229,6 +229,7 @@ def _migrate_snapshot_rows( "updated_ts": updated_ts, "unpaused_ts": unpaused_ts, "unrestorable": unrestorable, + "forward_only": forward_only, } for where in ( snapshot_id_filter( @@ -237,10 +238,16 @@ def _migrate_snapshot_rows( if snapshots is not None else [None] ) - for name, identifier, raw_snapshot, updated_ts, unpaused_ts, unrestorable in fetchall( + for name, identifier, raw_snapshot, updated_ts, unpaused_ts, unrestorable, forward_only in fetchall( self.engine_adapter, exp.select( - "name", "identifier", "snapshot", "updated_ts", "unpaused_ts", "unrestorable" + "name", + "identifier", + "snapshot", + "updated_ts", + "unpaused_ts", + "unrestorable", + "forward_only", ) .from_(self.snapshot_state.snapshots_table) .where(where) From d15446c7912c3703afe18587931f4dc5eeae6130 Mon Sep 17 00:00:00 2001 From: Trey Spiller <1831878+treysp@users.noreply.github.com> Date: Thu, 25 Sep 2025 17:24:36 -0500 Subject: [PATCH 020/173] Fix: route linter warnings correctly in github output (#5441) --- sqlmesh/core/console.py | 5 +- .../github/cicd/test_github_controller.py | 59 ++++++++++++++++--- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py index d9567ae484..8af837b08a 100644 --- a/sqlmesh/core/console.py +++ b/sqlmesh/core/console.py @@ -3641,7 +3641,10 @@ def show_linter_violations( msg = f"\nLinter {severity} for `{model._path}`:\n{violations_msg}\n" self._print(msg) - self._errors.append(msg) + if is_error: + self._errors.append(msg) + else: + self._warnings.append(msg) @property def captured_warnings(self) -> str: diff --git a/tests/integrations/github/cicd/test_github_controller.py b/tests/integrations/github/cicd/test_github_controller.py index a27f75f459..8242d697b6 100644 --- a/tests/integrations/github/cicd/test_github_controller.py +++ b/tests/integrations/github/cicd/test_github_controller.py @@ -15,6 +15,7 @@ from sqlmesh.core.model import SqlModel from sqlmesh.core.user import User, UserRole from sqlmesh.core.plan.definition import Plan +from sqlmesh.core.linter.rule import RuleViolation from sqlmesh.integrations.github.cicd.config import GithubCICDBotConfig, MergeMethod from sqlmesh.integrations.github.cicd.controller import ( BotCommand, @@ -29,6 +30,29 @@ pytestmark = pytest.mark.github + +def add_linter_violations(controller: GithubController): + class _MockModel: + _path = "tests/linter_test.sql" + + class _MockLinterRule: + name = "mock_linter_rule" + + controller._console.show_linter_violations( + [ + RuleViolation( + rule=_MockLinterRule(), violation_msg="Linter warning", violation_range=None + ) + ], + _MockModel(), + ) + controller._console.show_linter_violations( + [RuleViolation(rule=_MockLinterRule(), violation_msg="Linter error", violation_range=None)], + _MockModel(), + is_error=True, + ) + + github_controller_approvers_params = [ ( "2 approvers, 1 required", @@ -660,12 +684,18 @@ def test_get_plan_summary_includes_warnings_and_errors( controller._console.log_warning("Warning 1\nWith multiline") controller._console.log_warning("Warning 2") controller._console.log_error("Error 1") + add_linter_violations(controller) summary = controller.get_plan_summary(controller.prod_plan) - assert ("> [!WARNING]\n>\n> - Warning 1\n> With multiline\n>\n> - Warning 2\n\n") in summary - - assert ("> [!CAUTION]\n>\n> Error 1\n\n") in summary + assert ("> [!WARNING]\n>\n> - Warning 1\n> With multiline\n>\n> - Warning 2\n>\n>") in summary + assert ( + "> Linter warnings for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter warning\n>" + ) in summary + assert ("> [!CAUTION]\n>\n> - Error 1\n>\n>") in summary + assert ( + "> Linter **errors** for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter error\n>" + ) in summary def test_get_pr_environment_summary_includes_warnings_and_errors( @@ -679,24 +709,39 @@ def test_get_pr_environment_summary_includes_warnings_and_errors( controller._console.log_warning("Warning 1") controller._console.log_error("Error 1") + add_linter_violations(controller) # completed with no exception triggers a SUCCESS conclusion and only shows warnings success_summary = controller.get_pr_environment_summary( conclusion=GithubCheckConclusion.SUCCESS ) - assert "> [!WARNING]\n>\n> Warning 1\n" in success_summary - assert "> [!CAUTION]\n>\n> Error 1" not in success_summary + assert "> [!WARNING]\n>\n> - Warning 1\n" in success_summary + assert ( + "> Linter warnings for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter warning\n" + in success_summary + ) + assert "Error 1" not in success_summary + assert "mock_linter_rule: Linter error" not in success_summary # since they got consumed in the previous call controller._console.log_warning("Warning 1") controller._console.log_error("Error 1") + add_linter_violations(controller) # completed with an exception triggers a FAILED conclusion and shows errors error_summary = controller.get_pr_environment_summary( conclusion=GithubCheckConclusion.FAILURE, exception=SQLMeshError("Something broke") ) - assert "> [!WARNING]\n>\n> Warning 1\n" in error_summary - assert "> [!CAUTION]\n>\n> Error 1" in error_summary + assert "> [!WARNING]\n>\n> - Warning 1\n>\n" in error_summary + assert ( + "> Linter warnings for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter warning\n" + in error_summary + ) + assert "[!CAUTION]\n>
\n>\n> - Error 1\n>\n" in error_summary + assert ( + "> Linter **errors** for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter error\n" + in error_summary + ) def test_pr_comment_deploy_indicator_includes_command_namespace( From 71f3eb7f0c4cd52ed1a7144acd6424ca6078205f Mon Sep 17 00:00:00 2001 From: Tori Wei <41123940+toriwei@users.noreply.github.com> Date: Fri, 26 Sep 2025 09:16:28 -0700 Subject: [PATCH 021/173] fix: prevent duplicate audit names (#5431) --- sqlmesh/dbt/basemodel.py | 2 +- sqlmesh/dbt/loader.py | 3 ++- sqlmesh/dbt/test.py | 4 ++++ tests/core/integration/test_dbt.py | 2 +- tests/dbt/test_model.py | 10 +++++----- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 4637bbf91c..4dcf44a0af 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -305,7 +305,7 @@ def sqlmesh_model_kwargs( jinja_macros.add_globals(self._model_jinja_context(model_context, dependencies)) model_kwargs = { - "audits": [(test.name, {}) for test in self.tests], + "audits": [(test.canonical_name, {}) for test in self.tests], "column_descriptions": column_descriptions_to_sqlmesh(self.columns) or None, "depends_on": { model.canonical_name(context) for model in model_context.refs.values() diff --git a/sqlmesh/dbt/loader.py b/sqlmesh/dbt/loader.py index f7d97e74c8..eb117a3e40 100644 --- a/sqlmesh/dbt/loader.py +++ b/sqlmesh/dbt/loader.py @@ -172,7 +172,8 @@ def _load_audits( for test in package.tests.values(): logger.debug("Converting '%s' to sqlmesh format", test.name) try: - audits[test.name] = test.to_sqlmesh(package_context) + audits[test.canonical_name] = test.to_sqlmesh(package_context) + except BaseMissingReferenceError as e: ref_type = "model" if isinstance(e, MissingModelError) else "source" logger.warning( diff --git a/sqlmesh/dbt/test.py b/sqlmesh/dbt/test.py index 747c9d469c..1bd8a8e6e2 100644 --- a/sqlmesh/dbt/test.py +++ b/sqlmesh/dbt/test.py @@ -109,6 +109,10 @@ def _validate_severity(cls, v: t.Union[Severity, str]) -> Severity: def _lowercase_name(cls, v: str) -> str: return v.lower() + @property + def canonical_name(self) -> str: + return f"{self.package_name}.{self.name}" if self.package_name else self.name + @property def is_standalone(self) -> bool: # A test is standalone if: diff --git a/tests/core/integration/test_dbt.py b/tests/core/integration/test_dbt.py index 5e600899dd..6f23acb97e 100644 --- a/tests/core/integration/test_dbt.py +++ b/tests/core/integration/test_dbt.py @@ -48,7 +48,7 @@ def test_dbt_is_incremental_table_is_missing(sushi_test_dbt_context: Context): model = context.get_model("sushi.waiter_revenue_by_day_v2") model = model.copy(update={"kind": IncrementalUnmanagedKind(), "start": "2023-01-01"}) context.upsert_model(model) - context._standalone_audits["test_top_waiters"].start = "2023-01-01" + context._standalone_audits["sushi.test_top_waiters"].start = "2023-01-01" context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) diff --git a/tests/dbt/test_model.py b/tests/dbt/test_model.py index a64b29e89d..d212872cb7 100644 --- a/tests/dbt/test_model.py +++ b/tests/dbt/test_model.py @@ -190,23 +190,23 @@ def test_manifest_filters_standalone_tests_from_models( # Should only have "not_null" test, not the "relationships" test model1_audit_names = [audit[0] for audit in model1_snapshot.model.audits] assert len(model1_audit_names) == 1 - assert model1_audit_names[0] == "not_null_model1_id" + assert model1_audit_names[0] == "local.not_null_model1_id" # Verify model2 has its non-standalone test model2_audit_names = [audit[0] for audit in model2_snapshot.model.audits] assert len(model2_audit_names) == 1 - assert model2_audit_names[0] == "not_null_model2_id" + assert model2_audit_names[0] == "local.not_null_model2_id" # Verify the standalone test (relationships) exists as a StandaloneAudit all_non_standalone_audits = [name for name in context._audits] assert sorted(all_non_standalone_audits) == [ - "not_null_model1_id", - "not_null_model2_id", + "local.not_null_model1_id", + "local.not_null_model2_id", ] standalone_audits = [name for name in context._standalone_audits] assert len(standalone_audits) == 1 - assert standalone_audits[0] == "relationships_model1_id__id__ref_model2_" + assert standalone_audits[0] == "local.relationships_model1_id__id__ref_model2_" plan_builder = context.plan_builder() dag = plan_builder._build_dag() From 08950c8e9e2dbd73853aa5d46c34dfb11320b36f Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Fri, 26 Sep 2025 13:44:40 -0700 Subject: [PATCH 022/173] chore!: bump SQLGlot to v27.19.0 (#5446) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 35376dd095..9b192d6a78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.18.0", + "sqlglot[rs]~=27.19.0", "tenacity", "time-machine", "json-stream" From 0bf202c471ee0a3989383cc5d261ff4a41b5adf1 Mon Sep 17 00:00:00 2001 From: Tomasz Zorawik <67728999+xardasos@users.noreply.github.com> Date: Sat, 27 Sep 2025 01:25:36 +0200 Subject: [PATCH 023/173] Feat!: Categorize indirect MV changes as breaking for seamless version switching (#5374) --- sqlmesh/core/plan/builder.py | 8 ++ tests/core/test_plan.py | 142 ++++++++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py index 2eb4c54aeb..7d753cc330 100644 --- a/sqlmesh/core/plan/builder.py +++ b/sqlmesh/core/plan/builder.py @@ -680,6 +680,14 @@ def _categorize_snapshot( if mode == AutoCategorizationMode.FULL: snapshot.categorize_as(SnapshotChangeCategory.BREAKING, forward_only) elif self._context_diff.indirectly_modified(snapshot.name): + if snapshot.is_materialized_view and not forward_only: + # We categorize changes as breaking to allow for instantaneous switches in a virtual layer. + # Otherwise, there might be a potentially long downtime during MVs recreation. + # In the case of forward-only changes this optimization is not applicable because we want to continue + # using the same (existing) table version. + snapshot.categorize_as(SnapshotChangeCategory.INDIRECT_BREAKING, forward_only) + return + all_upstream_forward_only = set() all_upstream_categories = set() direct_parent_categories = set() diff --git a/tests/core/test_plan.py b/tests/core/test_plan.py index 40967f1fbe..4b330c376f 100644 --- a/tests/core/test_plan.py +++ b/tests/core/test_plan.py @@ -26,7 +26,7 @@ SqlModel, ModelKindName, ) -from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange +from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange, ViewKind from sqlmesh.core.model.seed import Seed from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals from sqlmesh.core.snapshot import ( @@ -4162,3 +4162,143 @@ def test_plan_ignore_cron_flag(make_snapshot): ], ) ] + + +def test_indirect_change_to_materialized_view_is_breaking(make_snapshot): + snapshot_a_old = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a"), + kind=ViewKind(materialized=True), + ) + ) + snapshot_a_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_b_old = make_snapshot( + SqlModel( + name="b", + query=parse_one("select col_a from a"), + kind=ViewKind(materialized=True), + ), + nodes={'"a"': snapshot_a_old.model}, + ) + snapshot_b_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_a_new = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a, 2 as col_b"), + kind=ViewKind(materialized=True), + ) + ) + + snapshot_a_new.previous_versions = snapshot_a_old.all_versions + + snapshot_b_new = make_snapshot( + snapshot_b_old.model, + nodes={'"a"': snapshot_a_new.model}, + ) + snapshot_b_new.previous_versions = snapshot_b_old.all_versions + + context_diff = ContextDiff( + environment="test_environment", + is_new_environment=True, + is_unfinalized_environment=False, + normalize_environment_name=True, + create_from="prod", + create_from_env_exists=True, + added=set(), + removed_snapshots={}, + modified_snapshots={ + snapshot_a_new.name: (snapshot_a_new, snapshot_a_old), + snapshot_b_new.name: (snapshot_b_new, snapshot_b_old), + }, + snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + new_snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + previous_plan_id=None, + previously_promoted_snapshot_ids=set(), + previous_finalized_snapshots=None, + previous_gateway_managed_virtual_layer=False, + gateway_managed_virtual_layer=False, + environment_statements=[], + ) + + PlanBuilder(context_diff, forward_only=False).build() + + assert snapshot_b_new.change_category == SnapshotChangeCategory.INDIRECT_BREAKING + + +def test_forward_only_indirect_change_to_materialized_view(make_snapshot): + snapshot_a_old = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a"), + ) + ) + snapshot_a_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_b_old = make_snapshot( + SqlModel( + name="b", + query=parse_one("select col_a from a"), + kind=ViewKind(materialized=True), + ), + nodes={'"a"': snapshot_a_old.model}, + ) + snapshot_b_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_a_new = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a, 2 as col_b"), + ) + ) + + snapshot_a_new.previous_versions = snapshot_a_old.all_versions + + snapshot_b_new = make_snapshot( + snapshot_b_old.model, + nodes={'"a"': snapshot_a_new.model}, + ) + snapshot_b_new.previous_versions = snapshot_b_old.all_versions + + context_diff = ContextDiff( + environment="test_environment", + is_new_environment=True, + is_unfinalized_environment=False, + normalize_environment_name=True, + create_from="prod", + create_from_env_exists=True, + added=set(), + removed_snapshots={}, + modified_snapshots={ + snapshot_a_new.name: (snapshot_a_new, snapshot_a_old), + snapshot_b_new.name: (snapshot_b_new, snapshot_b_old), + }, + snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + new_snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + previous_plan_id=None, + previously_promoted_snapshot_ids=set(), + previous_finalized_snapshots=None, + previous_gateway_managed_virtual_layer=False, + gateway_managed_virtual_layer=False, + environment_statements=[], + ) + + PlanBuilder(context_diff, forward_only=True).build() + + # Forward-only indirect changes to MVs should not always be classified as indirect breaking. + # Instead, we want to preserve the standard categorization. + assert snapshot_b_new.change_category == SnapshotChangeCategory.INDIRECT_NON_BREAKING From b29e71bc1a2c82cebd362d497af8ba0236f5e3b1 Mon Sep 17 00:00:00 2001 From: Vaggelis Danias Date: Mon, 29 Sep 2025 17:34:26 +0300 Subject: [PATCH 024/173] Feat!: Skip model evaluation if upstream external model(s) have not changed (#5277) --- sqlmesh/core/context.py | 7 + sqlmesh/core/engine_adapter/base.py | 4 + sqlmesh/core/engine_adapter/bigquery.py | 22 +++ sqlmesh/core/engine_adapter/snowflake.py | 16 ++ sqlmesh/core/plan/evaluator.py | 1 + sqlmesh/core/scheduler.py | 15 +- sqlmesh/core/signal.py | 44 ++++- sqlmesh/core/snapshot/definition.py | 31 ++++ sqlmesh/core/state_sync/base.py | 4 + sqlmesh/core/state_sync/db/facade.py | 3 +- sqlmesh/core/state_sync/db/interval.py | 22 ++- .../v0099_add_last_altered_to_intervals.py | 27 +++ .../integration/test_integration.py | 158 +++++++++++++++++- 13 files changed, 346 insertions(+), 8 deletions(-) create mode 100644 sqlmesh/migrations/v0099_add_last_altered_to_intervals.py diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index e3feb1e14b..e31a04fe81 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -274,6 +274,7 @@ def __init__( deployability_index: t.Optional[DeployabilityIndex] = None, default_dialect: t.Optional[str] = None, default_catalog: t.Optional[str] = None, + is_restatement: t.Optional[bool] = None, variables: t.Optional[t.Dict[str, t.Any]] = None, blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, ): @@ -284,6 +285,7 @@ def __init__( self._default_dialect = default_dialect self._variables = variables or {} self._blueprint_variables = blueprint_variables or {} + self._is_restatement = is_restatement @property def default_dialect(self) -> t.Optional[str]: @@ -308,6 +310,10 @@ def gateway(self) -> t.Optional[str]: """Returns the gateway name.""" return self.var(c.GATEWAY) + @property + def is_restatement(self) -> t.Optional[bool]: + return self._is_restatement + def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: """Returns a variable value.""" return self._variables.get(var_name.lower(), default) @@ -328,6 +334,7 @@ def with_variables( self.deployability_index, self._default_dialect, self._default_catalog, + self._is_restatement, variables=variables, blueprint_variables=blueprint_variables, ) diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index 47e6a4260c..68c6404081 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -119,6 +119,7 @@ class EngineAdapter: MAX_IDENTIFIER_LENGTH: t.Optional[int] = None ATTACH_CORRELATION_ID = True SUPPORTS_QUERY_EXECUTION_TRACKING = False + SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS = False def __init__( self, @@ -2927,6 +2928,9 @@ def _check_identifier_length(self, expression: exp.Expression) -> None: f"Identifier name '{name}' (length {name_length}) exceeds {self.dialect.capitalize()}'s max identifier limit of {self.MAX_IDENTIFIER_LENGTH} characters" ) + def get_table_last_modified_ts(self, table_names: t.List[TableName]) -> t.List[int]: + raise NotImplementedError() + class EngineAdapterWithIndexSupport(EngineAdapter): SUPPORTS_INDEXES = True diff --git a/sqlmesh/core/engine_adapter/bigquery.py b/sqlmesh/core/engine_adapter/bigquery.py index 0dfa2325e8..26abad9ebc 100644 --- a/sqlmesh/core/engine_adapter/bigquery.py +++ b/sqlmesh/core/engine_adapter/bigquery.py @@ -755,6 +755,28 @@ def table_exists(self, table_name: TableName) -> bool: except NotFound: return False + def get_table_last_modified_ts(self, table_names: t.List[TableName]) -> t.List[int]: + from sqlmesh.utils.date import to_timestamp + + datasets_to_tables: t.DefaultDict[str, t.List[str]] = defaultdict(list) + for table_name in table_names: + table = exp.to_table(table_name) + datasets_to_tables[table.db].append(table.name) + + results = [] + + for dataset, tables in datasets_to_tables.items(): + query = ( + f"SELECT TIMESTAMP_MILLIS(last_modified_time) FROM `{dataset}.__TABLES__` WHERE " + ) + for i, table_name in enumerate(tables): + query += f"TABLE_ID = '{table_name}'" + if i < len(tables) - 1: + query += " OR " + results.extend(self.fetchall(query)) + + return [to_timestamp(row[0]) for row in results] + def _get_table(self, table_name: TableName) -> BigQueryTable: """ Returns a BigQueryTable object for the given table name. diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py index 9c27b45115..1554589779 100644 --- a/sqlmesh/core/engine_adapter/snowflake.py +++ b/sqlmesh/core/engine_adapter/snowflake.py @@ -54,6 +54,7 @@ class SnowflakeEngineAdapter(GetCurrentCatalogFromFunctionMixin, ClusteredByMixi SUPPORTS_MANAGED_MODELS = True CURRENT_CATALOG_EXPRESSION = exp.func("current_database") SUPPORTS_CREATE_DROP_CATALOG = True + SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS = True SUPPORTED_DROP_CASCADE_OBJECT_KINDS = ["DATABASE", "SCHEMA", "TABLE"] SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { @@ -669,3 +670,18 @@ def close(self) -> t.Any: self._connection_pool.set_attribute(self.SNOWPARK, None) return super().close() + + def get_table_last_modified_ts(self, table_names: t.List[TableName]) -> t.List[int]: + from sqlmesh.utils.date import to_timestamp + + num_tables = len(table_names) + + query = "SELECT LAST_ALTERED FROM INFORMATION_SCHEMA.TABLES WHERE" + for i, table_name in enumerate(table_names): + table = exp.to_table(table_name) + query += f"""(TABLE_NAME = '{table.name}' AND TABLE_SCHEMA = '{table.db}' AND TABLE_CATALOG = '{table.catalog}')""" + if i < num_tables - 1: + query += " OR " + + result = self.fetchall(query) + return [to_timestamp(row[0]) for row in result] diff --git a/sqlmesh/core/plan/evaluator.py b/sqlmesh/core/plan/evaluator.py index 03ecb770bf..f2f432a97e 100644 --- a/sqlmesh/core/plan/evaluator.py +++ b/sqlmesh/core/plan/evaluator.py @@ -258,6 +258,7 @@ def visit_backfill_stage(self, stage: stages.BackfillStage, plan: EvaluatablePla allow_additive_snapshots=plan.allow_additive_models, selected_snapshot_ids=stage.selected_snapshot_ids, selected_models=plan.selected_models, + is_restatement=bool(plan.restatements), ) if errors: raise PlanError("Plan application failed.") diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py index af4d72b165..7e27205fc6 100644 --- a/sqlmesh/core/scheduler.py +++ b/sqlmesh/core/scheduler.py @@ -251,7 +251,9 @@ def evaluate( **kwargs, ) - self.state_sync.add_interval(snapshot, start, end, is_dev=not is_deployable) + self.state_sync.add_interval( + snapshot, start, end, is_dev=not is_deployable, last_altered_ts=now_timestamp() + ) return audit_results def run( @@ -335,6 +337,7 @@ def batch_intervals( deployability_index: t.Optional[DeployabilityIndex], environment_naming_info: EnvironmentNamingInfo, dag: t.Optional[DAG[SnapshotId]] = None, + is_restatement: bool = False, ) -> t.Dict[Snapshot, Intervals]: dag = dag or snapshots_to_dag(merged_intervals) @@ -367,6 +370,7 @@ def batch_intervals( deployability_index, default_dialect=adapter.dialect, default_catalog=self.default_catalog, + is_restatement=is_restatement, ) intervals = self._check_ready_intervals( @@ -422,6 +426,7 @@ def run_merged_intervals( run_environment_statements: bool = False, audit_only: bool = False, auto_restatement_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {}, + is_restatement: bool = False, ) -> t.Tuple[t.List[NodeExecutionFailedError[SchedulingUnit]], t.List[SchedulingUnit]]: """Runs precomputed batches of missing intervals. @@ -455,9 +460,12 @@ def run_merged_intervals( snapshot_dag = full_dag.subdag(*selected_snapshot_ids_set) batched_intervals = self.batch_intervals( - merged_intervals, deployability_index, environment_naming_info, dag=snapshot_dag + merged_intervals, + deployability_index, + environment_naming_info, + dag=snapshot_dag, + is_restatement=is_restatement, ) - self.console.start_evaluation_progress( batched_intervals, environment_naming_info, @@ -956,6 +964,7 @@ def _check_ready_intervals( python_env=signals.python_env, dialect=snapshot.model.dialect, path=snapshot.model._path, + snapshot=snapshot, kwargs=kwargs, ) except SQLMeshError as e: diff --git a/sqlmesh/core/signal.py b/sqlmesh/core/signal.py index d9ee670922..52e6c59c8d 100644 --- a/sqlmesh/core/signal.py +++ b/sqlmesh/core/signal.py @@ -1,8 +1,14 @@ from __future__ import annotations - +import typing as t from sqlmesh.utils import UniqueKeyDict, registry_decorator +if t.TYPE_CHECKING: + from sqlmesh.core.context import ExecutionContext + from sqlmesh.core.snapshot.definition import Snapshot + from sqlmesh.utils.date import DatetimeRanges + from sqlmesh.core.snapshot.definition import DeployabilityIndex + class signal(registry_decorator): """Specifies a function which intervals are ready from a list of scheduled intervals. @@ -33,3 +39,39 @@ class signal(registry_decorator): SignalRegistry = UniqueKeyDict[str, signal] + + +@signal() +def freshness(batch: DatetimeRanges, snapshot: Snapshot, context: ExecutionContext) -> bool: + adapter = context.engine_adapter + if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS: + return True + + deployability_index = context.deployability_index or DeployabilityIndex.all_deployable() + + last_altered_ts = ( + snapshot.last_altered_ts + if deployability_index.is_deployable(snapshot) + else snapshot.dev_last_altered_ts + ) + if not last_altered_ts: + return True + + parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents} + if len(parent_snapshots) != len(snapshot.node.depends_on) or not all( + p.is_external for p in parent_snapshots + ): + # The mismatch can happen if e.g an external model is not registered in the project + return True + + # Finding new data means that the upstream depedencies have been altered + # since the last time the model was evaluated + upstream_dep_has_new_data = any( + upstream_last_altered_ts > last_altered_ts + for upstream_last_altered_ts in adapter.get_table_last_modified_ts( + [p.name for p in parent_snapshots] + ) + ) + + # Returning true is a no-op, returning False nullifies the batch so the model will not be evaluated. + return upstream_dep_has_new_data diff --git a/sqlmesh/core/snapshot/definition.py b/sqlmesh/core/snapshot/definition.py index 600d84fe83..23ab0b21db 100644 --- a/sqlmesh/core/snapshot/definition.py +++ b/sqlmesh/core/snapshot/definition.py @@ -185,6 +185,8 @@ class SnapshotIntervals(PydanticModel): intervals: Intervals = [] dev_intervals: Intervals = [] pending_restatement_intervals: Intervals = [] + last_altered_ts: t.Optional[int] = None + dev_last_altered_ts: t.Optional[int] = None @property def snapshot_id(self) -> t.Optional[SnapshotId]: @@ -205,6 +207,12 @@ def add_dev_interval(self, start: int, end: int) -> None: def add_pending_restatement_interval(self, start: int, end: int) -> None: self._add_interval(start, end, "pending_restatement_intervals") + def update_last_altered_ts(self, last_altered_ts: t.Optional[int]) -> None: + self._update_last_altered_ts(last_altered_ts, "last_altered_ts") + + def update_dev_last_altered_ts(self, last_altered_ts: t.Optional[int]) -> None: + self._update_last_altered_ts(last_altered_ts, "dev_last_altered_ts") + def remove_interval(self, start: int, end: int) -> None: self._remove_interval(start, end, "intervals") @@ -224,6 +232,13 @@ def _add_interval(self, start: int, end: int, interval_attr: str) -> None: target_intervals = merge_intervals([*target_intervals, (start, end)]) setattr(self, interval_attr, target_intervals) + def _update_last_altered_ts( + self, last_altered_ts: t.Optional[int], last_altered_attr: str + ) -> None: + if last_altered_ts: + existing_last_altered_ts = getattr(self, last_altered_attr) + setattr(self, last_altered_attr, max(existing_last_altered_ts or 0, last_altered_ts)) + def _remove_interval(self, start: int, end: int, interval_attr: str) -> None: target_intervals = getattr(self, interval_attr) target_intervals = remove_interval(target_intervals, start, end) @@ -713,6 +728,10 @@ class Snapshot(PydanticModel, SnapshotInfoMixin): dev_table_suffix: str = "dev" table_naming_convention: TableNamingConvention = TableNamingConvention.default forward_only: bool = False + # Physical table last modified timestamp, not to be confused with the "updated_ts" field + # which is for the snapshot record itself + last_altered_ts: t.Optional[int] = None + dev_last_altered_ts: t.Optional[int] = None @field_validator("ttl") @classmethod @@ -751,6 +770,7 @@ def hydrate_with_intervals_by_version( ) for interval in snapshot_intervals: snapshot.merge_intervals(interval) + result.append(snapshot) return result @@ -957,12 +977,20 @@ def merge_intervals(self, other: t.Union[Snapshot, SnapshotIntervals]) -> None: if not apply_effective_from or end <= effective_from_ts: self.add_interval(start, end) + if other.last_altered_ts: + self.last_altered_ts = max(self.last_altered_ts or 0, other.last_altered_ts) + if self.dev_version == other.dev_version: # Merge dev intervals if the dev versions match which would mean # that this and the other snapshot are pointing to the same dev table. for start, end in other.dev_intervals: self.add_interval(start, end, is_dev=True) + if other.dev_last_altered_ts: + self.dev_last_altered_ts = max( + self.dev_last_altered_ts or 0, other.dev_last_altered_ts + ) + self.pending_restatement_intervals = merge_intervals( [*self.pending_restatement_intervals, *other.pending_restatement_intervals] ) @@ -1081,6 +1109,7 @@ def check_ready_intervals( python_env=signals.python_env, dialect=self.model.dialect, path=self.model._path, + snapshot=self, kwargs=kwargs, ) except SQLMeshError as e: @@ -2421,6 +2450,7 @@ def check_ready_intervals( python_env: t.Dict[str, Executable], dialect: DialectType = None, path: t.Optional[Path] = None, + snapshot: t.Optional[Snapshot] = None, kwargs: t.Optional[t.Dict] = None, ) -> Intervals: checked_intervals: Intervals = [] @@ -2436,6 +2466,7 @@ def check_ready_intervals( provided_args=(batch,), provided_kwargs=(kwargs or {}), context=context, + snapshot=snapshot, ) except Exception as ex: raise SignalEvalError(format_evaluated_code_exception(ex, python_env)) diff --git a/sqlmesh/core/state_sync/base.py b/sqlmesh/core/state_sync/base.py index 450d6f7408..2f8a68dd4a 100644 --- a/sqlmesh/core/state_sync/base.py +++ b/sqlmesh/core/state_sync/base.py @@ -496,6 +496,7 @@ def add_interval( start: TimeLike, end: TimeLike, is_dev: bool = False, + last_altered_ts: t.Optional[int] = None, ) -> None: """Add an interval to a snapshot and sync it to the store. @@ -504,6 +505,7 @@ def add_interval( start: The start of the interval to add. end: The end of the interval to add. is_dev: Indicates whether the given interval is being added while in development mode + last_altered_ts: The timestamp of the last modification of the physical table """ start_ts, end_ts = snapshot.inclusive_exclusive(start, end, strict=False, expand=False) if not snapshot.version: @@ -516,6 +518,8 @@ def add_interval( dev_version=snapshot.dev_version, intervals=intervals if not is_dev else [], dev_intervals=intervals if is_dev else [], + last_altered_ts=last_altered_ts if not is_dev else None, + dev_last_altered_ts=last_altered_ts if is_dev else None, ) self.add_snapshots_intervals([snapshot_intervals]) diff --git a/sqlmesh/core/state_sync/db/facade.py b/sqlmesh/core/state_sync/db/facade.py index 29fc9f1740..3c23ef339c 100644 --- a/sqlmesh/core/state_sync/db/facade.py +++ b/sqlmesh/core/state_sync/db/facade.py @@ -381,8 +381,9 @@ def add_interval( start: TimeLike, end: TimeLike, is_dev: bool = False, + last_altered_ts: t.Optional[int] = None, ) -> None: - super().add_interval(snapshot, start, end, is_dev) + super().add_interval(snapshot, start, end, is_dev, last_altered_ts) @transactional() def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None: diff --git a/sqlmesh/core/state_sync/db/interval.py b/sqlmesh/core/state_sync/db/interval.py index b15ad2d57b..8ccdc58fa0 100644 --- a/sqlmesh/core/state_sync/db/interval.py +++ b/sqlmesh/core/state_sync/db/interval.py @@ -60,6 +60,7 @@ def __init__( "is_removed": exp.DataType.build("boolean"), "is_compacted": exp.DataType.build("boolean"), "is_pending_restatement": exp.DataType.build("boolean"), + "last_altered_ts": exp.DataType.build("bigint"), } def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None: @@ -215,13 +216,23 @@ def _push_snapshot_intervals( for start_ts, end_ts in snapshot.intervals: new_intervals.append( _interval_to_df( - snapshot, start_ts, end_ts, is_dev=False, is_compacted=is_compacted + snapshot, + start_ts, + end_ts, + is_dev=False, + is_compacted=is_compacted, + last_altered_ts=snapshot.last_altered_ts, ) ) for start_ts, end_ts in snapshot.dev_intervals: new_intervals.append( _interval_to_df( - snapshot, start_ts, end_ts, is_dev=True, is_compacted=is_compacted + snapshot, + start_ts, + end_ts, + is_dev=True, + is_compacted=is_compacted, + last_altered_ts=snapshot.dev_last_altered_ts, ) ) @@ -236,6 +247,7 @@ def _push_snapshot_intervals( is_dev=False, is_compacted=is_compacted, is_pending_restatement=True, + last_altered_ts=snapshot.last_altered_ts, ) ) @@ -284,6 +296,7 @@ def _get_snapshot_intervals( is_dev, is_removed, is_pending_restatement, + last_altered_ts, ) in rows: interval_ids.add(interval_id) merge_key = (name, version, dev_version, identifier) @@ -318,8 +331,10 @@ def _get_snapshot_intervals( else: if is_dev: intervals[merge_key].add_dev_interval(start, end) + intervals[merge_key].update_dev_last_altered_ts(last_altered_ts) else: intervals[merge_key].add_interval(start, end) + intervals[merge_key].update_last_altered_ts(last_altered_ts) # Remove all pending restatement intervals recorded before the current interval has been added intervals[ pending_restatement_interval_merge_key @@ -340,6 +355,7 @@ def _get_snapshot_intervals_query(self, uncompacted_only: bool) -> exp.Select: "is_dev", "is_removed", "is_pending_restatement", + "last_altered_ts", ) .from_(exp.to_table(self.intervals_table).as_("intervals")) .order_by( @@ -460,6 +476,7 @@ def _interval_to_df( is_removed: bool = False, is_compacted: bool = False, is_pending_restatement: bool = False, + last_altered_ts: t.Optional[int] = None, ) -> t.Dict[str, t.Any]: return { "id": random_id(), @@ -474,4 +491,5 @@ def _interval_to_df( "is_removed": is_removed, "is_compacted": is_compacted, "is_pending_restatement": is_pending_restatement, + "last_altered_ts": last_altered_ts, } diff --git a/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py b/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py new file mode 100644 index 0000000000..1a119a338d --- /dev/null +++ b/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py @@ -0,0 +1,27 @@ +"""Add dev version to the intervals table.""" + +from sqlglot import exp + + +def migrate_schemas(state_sync, **kwargs): # type: ignore + engine_adapter = state_sync.engine_adapter + schema = state_sync.schema + intervals_table = "_intervals" + if schema: + intervals_table = f"{schema}.{intervals_table}" + + alter_table_exp = exp.Alter( + this=exp.to_table(intervals_table), + kind="TABLE", + actions=[ + exp.ColumnDef( + this=exp.to_column("last_altered_ts"), + kind=exp.DataType.build("BIGINT", dialect=engine_adapter.dialect), + ) + ], + ) + engine_adapter.execute(alter_table_exp) + + +def migrate_rows(state_sync, **kwargs): # type: ignore + pass diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 5a708e1e4c..5190d26e98 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -10,6 +10,11 @@ from unittest import mock from unittest.mock import patch import logging +from IPython.utils.capture import capture_output + + +import time_machine +from pytest_mock.plugin import MockerFixture import numpy as np # noqa: TID253 import pandas as pd # noqa: TID253 @@ -45,6 +50,7 @@ TEST_SCHEMA, wait_until, ) +from tests.utils.test_helpers import use_terminal_console DATA_TYPE = exp.DataType.Type VARCHAR_100 = exp.DataType.build("varchar(100)") @@ -3774,7 +3780,7 @@ def _set_config(gateway: str, config: Config) -> None: ] -def test_materialized_view_evaluation(ctx: TestContext, mocker: MockerFixture): +def test_materialized_view_evaluation(ctx: TestContext): adapter = ctx.engine_adapter dialect = ctx.dialect @@ -3834,3 +3840,153 @@ def _assert_mview_value(value: int): assert any("Replacing view" in call[0][0] for call in mock_logger.call_args_list) _assert_mview_value(value=2) + + +@use_terminal_console +def test_external_model_freshness(ctx: TestContext, mocker: MockerFixture, tmp_path: pathlib.Path): + adapter = ctx.engine_adapter + if not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS: + pytest.skip("This test only runs for engines that support metadata-based freshness") + + def _assert_snapshot_last_altered_ts( + context: Context, + snapshot_id: str, + last_altered_ts: datetime, + dev_last_altered_ts: t.Optional[datetime] = None, + ): + from sqlmesh.utils.date import to_datetime + + snapshot = context.state_sync.get_snapshots([snapshot_id])[snapshot_id] + + assert to_datetime(snapshot.last_altered_ts).replace( + microsecond=0 + ) == last_altered_ts.replace(microsecond=0) + + if dev_last_altered_ts: + assert to_datetime(snapshot.dev_last_altered_ts).replace( + microsecond=0 + ) == dev_last_altered_ts.replace(microsecond=0) + + import sqlmesh + + spy = mocker.spy(sqlmesh.core.snapshot.evaluator.SnapshotEvaluator, "evaluate") + + def _assert_model_evaluation(lambda_func, was_evaluated, day_delta=0): + spy.reset_mock() + timestamp = now(minute_floor=False) + timedelta(days=day_delta) + with time_machine.travel(timestamp, tick=False): + with capture_output() as output: + plan_or_run_result = lambda_func() + + evaluate_function_called = spy.call_count == 1 + signal_was_checked = "Checking signals for" in output.stdout + + assert signal_was_checked + if was_evaluated: + assert "All ready" in output.stdout + assert evaluate_function_called + else: + assert "None ready" in output.stdout + assert not evaluate_function_called + + return timestamp, plan_or_run_result + + # Create & initialize schema + schema = ctx.add_test_suffix(TEST_SCHEMA) + ctx._schemas.append(schema) + adapter.create_schema(schema) + + # Create & initialize external models + external_table1 = f"{schema}.external_table1" + external_table2 = f"{schema}.external_table2" + + external_models_yaml = tmp_path / "external_models.yaml" + external_models_yaml.write_text(f""" +- name: {external_table1} + columns: + col1: int + +- name: {external_table2} + columns: + col2: int +""") + + adapter.execute( + f"CREATE TABLE {external_table1} AS (SELECT 1 AS col1)", quote_identifiers=False + ) + adapter.execute( + f"CREATE TABLE {external_table2} AS (SELECT 2 AS col2)", quote_identifiers=False + ) + + # Create model that depends on external models + model_name = f"{schema}.new_model" + model_path = tmp_path / "models" / "new_model.sql" + (tmp_path / "models").mkdir(parents=True, exist_ok=True) + model_path.write_text(f""" + MODEL ( + name {model_name}, + start '2024-01-01', + kind FULL, + signals ( + freshness(), + ) + ); + + SELECT col1 * col2 AS col FROM {external_table1}, {external_table2}; + """) + + # Initialize context + def _set_config(gateway: str, config: Config) -> None: + config.model_defaults.dialect = ctx.dialect + + context = ctx.create_context(path=tmp_path, config_mutator=_set_config) + + # Case 1: Model is evaluated for the first plan + prod_plan_ts, prod_plan = _assert_model_evaluation( + lambda: context.plan(auto_apply=True, no_prompts=True), was_evaluated=True + ) + + prod_snapshot_id = next(iter(prod_plan.context_diff.new_snapshots)) + _assert_snapshot_last_altered_ts(context, prod_snapshot_id, last_altered_ts=prod_plan_ts) + + # Case 2: Model is NOT evaluated on run if external models are not fresh + _assert_model_evaluation(lambda: context.run(), was_evaluated=False, day_delta=1) + + # Case 3: Differentiate last_altered_ts between snapshots with shared version + # For instance, creating a FORWARD_ONLY change in dev (reusing the version but creating a dev preview) should not cause + # any side effects to the prod snapshot's last_altered_ts hydration + model_path.write_text(model_path.read_text().replace("col1 * col2", "col1 + col2")) + context.load() + dev_plan_ts = now(minute_floor=False) + timedelta(days=2) + with time_machine.travel(dev_plan_ts, tick=False): + dev_plan = context.plan( + environment="dev", forward_only=True, auto_apply=True, no_prompts=True + ) + + context.state_sync.clear_cache() + dev_snapshot_id = next(iter(dev_plan.context_diff.new_snapshots)) + _assert_snapshot_last_altered_ts( + context, + dev_snapshot_id, + last_altered_ts=prod_plan_ts, + dev_last_altered_ts=dev_plan_ts, + ) + _assert_snapshot_last_altered_ts(context, prod_snapshot_id, last_altered_ts=prod_plan_ts) + + # Case 4: Model is evaluated on run if any external model is fresh + adapter.execute(f"INSERT INTO {external_table2} (col2) VALUES (3)", quote_identifiers=False) + _assert_model_evaluation(lambda: context.run(), was_evaluated=True, day_delta=2) + + # Case 5: Model is evaluated if changed (case 3) even if the external model is not fresh + model_path.write_text(model_path.read_text().replace("col1 + col2", "col1 * col2 * 5")) + context.load() + _assert_model_evaluation( + lambda: context.plan(auto_apply=True, no_prompts=True), was_evaluated=True, day_delta=3 + ) + + # Case 6: Model is evaluated on a restatement plan even if the external model is not fresh + _assert_model_evaluation( + lambda: context.plan(restate_models=[model_name], auto_apply=True, no_prompts=True), + was_evaluated=True, + day_delta=4, + ) From 89e74bc4bd87e7ba76c3112c9896bb0d080a7ec1 Mon Sep 17 00:00:00 2001 From: Ben <9087625+benfdking@users.noreply.github.com> Date: Mon, 29 Sep 2025 18:04:29 +0100 Subject: [PATCH 025/173] chore: add dbt unit tests to fixture (#5451) --- .../fixtures/dbt/sushi_test/models/schema.yml | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/fixtures/dbt/sushi_test/models/schema.yml b/tests/fixtures/dbt/sushi_test/models/schema.yml index 87a201c418..21985f19ff 100644 --- a/tests/fixtures/dbt/sushi_test/models/schema.yml +++ b/tests/fixtures/dbt/sushi_test/models/schema.yml @@ -1,6 +1,55 @@ version: 2 models: + - name: simple_model_a + description: A simple model for testing + columns: + - name: a + data_type: int + unit_tests: + - name: test_simple_model_a_outputs_one + description: Test that simple_model_a outputs 1 as column a + model: simple_model_a + given: [] # No input models needed + expect: + format: csv + rows: | + a + 1 + - name: simple_model_b + description: Model that references simple_model_a + columns: + - name: a + data_type: int + unit_tests: + - name: test_simple_model_b_with_mock_input + description: Test simple_model_b with mocked simple_model_a input + model: simple_model_b + given: + - input: ref('simple_model_a') + format: csv + rows: | + a + 10 + 20 + 30 + expect: + format: csv + rows: | + a + 10 + 20 + 30 + - name: test_simple_model_b_with_sql_input + description: Test simple_model_b with SQL-defined input data + model: simple_model_b + given: + - input: ref('simple_model_a') + format: sql + rows: SELECT 42 AS a + expect: + format: sql + rows: SELECT 42 AS a - name: top_waiters description: description of top waiters columns: From e4cb63e6524ae1abe9c881299e290d6241af2849 Mon Sep 17 00:00:00 2001 From: Tori Wei <41123940+toriwei@users.noreply.github.com> Date: Mon, 29 Sep 2025 11:46:26 -0700 Subject: [PATCH 026/173] fix: normalize audit canonical name (#5448) --- sqlmesh/dbt/test.py | 2 +- tests/dbt/test_config.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/sqlmesh/dbt/test.py b/sqlmesh/dbt/test.py index 1bd8a8e6e2..7d8a369068 100644 --- a/sqlmesh/dbt/test.py +++ b/sqlmesh/dbt/test.py @@ -111,7 +111,7 @@ def _lowercase_name(cls, v: str) -> str: @property def canonical_name(self) -> str: - return f"{self.package_name}.{self.name}" if self.package_name else self.name + return f"{self.package_name}.{self.name}".lower() if self.package_name else self.name @property def is_standalone(self) -> bool: diff --git a/tests/dbt/test_config.py b/tests/dbt/test_config.py index c484b8e126..b3ee0c422a 100644 --- a/tests/dbt/test_config.py +++ b/tests/dbt/test_config.py @@ -245,6 +245,31 @@ def test_test_to_sqlmesh_fields(): assert audit.dialect == "bigquery" +def test_test_config_canonical_name(): + test_config_upper_case_package = TestConfig( + name="foo_test", + package_name="TEST_PACKAGE", + sql="SELECT 1", + ) + + assert test_config_upper_case_package.canonical_name == "test_package.foo_test" + + test_config_mixed_case_package = TestConfig( + name="Bar_Test", + package_name="MixedCase_Package", + sql="SELECT 1", + ) + + assert test_config_mixed_case_package.canonical_name == "mixedcase_package.bar_test" + + test_config_no_package = TestConfig( + name="foo_bar_test", + sql="SELECT 1", + ) + + assert test_config_no_package.canonical_name == "foo_bar_test" + + def test_singular_test_to_standalone_audit(dbt_dummy_postgres_config: PostgresConfig): sql = "SELECT * FROM FOO.BAR WHERE cost > 100" test_config = TestConfig( From 622fd16d4f23e1d5926025e15c00e03f35c9b5f1 Mon Sep 17 00:00:00 2001 From: Trey Spiller <1831878+treysp@users.noreply.github.com> Date: Mon, 29 Sep 2025 14:55:44 -0500 Subject: [PATCH 027/173] Chore: add fields to dbt jinja target variable (#5449) --- sqlmesh/dbt/target.py | 17 +++++++++++++-- tests/dbt/test_transformation.py | 36 +++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/sqlmesh/dbt/target.py b/sqlmesh/dbt/target.py index f5fd119027..c53c818933 100644 --- a/sqlmesh/dbt/target.py +++ b/sqlmesh/dbt/target.py @@ -45,11 +45,24 @@ # We only serialize a subset of fields in order to avoid persisting sensitive information SERIALIZABLE_FIELDS = { - "type", + # core "name", - "database", "schema_", + "type", + "threads", + # snowflake + "database", "warehouse", + "user", + "role", + "account", + # postgres/redshift + "dbname", + "host", + "port", + # bigquery + "project", + "dataset", } SCHEMA_DIFFER_OVERRIDES = { diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index 29651f9140..b9db817d29 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -1023,8 +1023,41 @@ def test_target_jinja(sushi_test_project: Project): user="user", password="password", warehouse="warehouse", + role="role", + threads=1, ) + assert context.render("{{ target.threads }}") == "1" + assert context.render("{{ target.database }}") == "test" assert context.render("{{ target.warehouse }}") == "warehouse" + assert context.render("{{ target.user }}") == "user" + assert context.render("{{ target.role }}") == "role" + assert context.render("{{ target.account }}") == "account" + + context = DbtContext() + context._target = PostgresConfig( + name="target", + schema="test", + database="test", + dbname="test", + host="host", + port=5432, + user="user", + password="password", + ) + assert context.render("{{ target.dbname }}") == "test" + assert context.render("{{ target.host }}") == "host" + assert context.render("{{ target.port }}") == "5432" + + context = DbtContext() + context._target = BigQueryConfig( + name="target", + schema="test", + database="test", + project="project", + dataset="dataset", + ) + assert context.render("{{ target.project }}") == "project" + assert context.render("{{ target.dataset }}") == "dataset" @pytest.mark.xdist_group("dbt_manifest") @@ -1965,8 +1998,9 @@ def test_snapshot_json_payload(): assert snapshot_json["node"]["jinja_macros"]["global_objs"]["target"] == { "type": "duckdb", "name": "in_memory", - "schema": "sushi", "database": "memory", + "schema": "sushi", + "threads": 1, "target_name": "in_memory", } From a90db9a4e322bc4bc4e55311815d0b1eb8d71b79 Mon Sep 17 00:00:00 2001 From: Trey Spiller <1831878+treysp@users.noreply.github.com> Date: Mon, 29 Sep 2025 17:19:14 -0500 Subject: [PATCH 028/173] Fix: pop 'begin' model config for all model kinds (#5453) --- sqlmesh/dbt/model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index 9386b0b4f8..7d9a7e3348 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -656,6 +656,9 @@ def to_sqlmesh( # Set allow_partials to True for dbt models to preserve the original semantics. allow_partials = True + # pop begin for all models so we don't pass it through for non-incremental materializations + # (happens if model config is microbatch but project config overrides) + begin = model_kwargs.pop("begin", None) if kind.is_incremental: if self.batch_size and isinstance(self.batch_size, str): if "interval_unit" in model_kwargs: @@ -665,7 +668,7 @@ def to_sqlmesh( else: model_kwargs["interval_unit"] = self.batch_size self.batch_size = None - if begin := model_kwargs.pop("begin", None): + if begin: if "start" in model_kwargs: get_console().log_warning( f"Both 'begin' and 'start' are set for model '{self.canonical_name(context)}'. 'start' will be used." From cbcb6d2c3759b2fdc0e193153926eaf1c9f052e1 Mon Sep 17 00:00:00 2001 From: Tori Wei <41123940+toriwei@users.noreply.github.com> Date: Tue, 30 Sep 2025 08:03:04 -0700 Subject: [PATCH 029/173] fix: ignore partition_by field for ephemeral models (#5454) --- sqlmesh/dbt/model.py | 5 +++-- tests/dbt/test_transformation.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index 7d9a7e3348..f6cb81f30f 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -510,10 +510,11 @@ def to_sqlmesh( physical_properties: t.Dict[str, t.Any] = {} if self.partition_by: - if isinstance(kind, ViewKind): + if isinstance(kind, (ViewKind, EmbeddedKind)): logger.warning( - "Ignoring partition_by config for model '%s'; partition_by is not supported for views.", + "Ignoring partition_by config for model '%s'; partition_by is not supported for %s.", self.name, + "views" if isinstance(kind, ViewKind) else "ephemeral models", ) else: partitioned_by = [] diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index b9db817d29..a640d620b7 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -1728,6 +1728,18 @@ def test_partition_by(sushi_test_project: Project): ) assert model_config.to_sqlmesh(context).partitioned_by == [] + model_config = ModelConfig( + name="model", + alias="model", + schema="test", + package_name="package", + materialized=Materialization.EPHEMERAL.value, + unique_key="ds", + partition_by={"field": "ds", "granularity": "month"}, + sql="""SELECT 1 AS one, ds FROM foo""", + ) + assert model_config.to_sqlmesh(context).partitioned_by == [] + @pytest.mark.xdist_group("dbt_manifest") def test_partition_by_none(sushi_test_project: Project): From a255e1710f9a12aa8edc8aae8f8f0ba02af50777 Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Tue, 30 Sep 2025 19:06:08 +0300 Subject: [PATCH 030/173] Feat(dbt): Add support for dbt custom materializations (#5435) Co-authored-by: Iaroslav Zeigerman --- sqlmesh/core/model/kind.py | 47 ++ sqlmesh/core/snapshot/evaluator.py | 205 ++++- sqlmesh/dbt/basemodel.py | 14 + sqlmesh/dbt/builtin.py | 1 + sqlmesh/dbt/manifest.py | 70 +- sqlmesh/dbt/model.py | 27 + sqlmesh/dbt/package.py | 17 +- sqlmesh/utils/jinja.py | 1 + tests/dbt/test_custom_materializations.py | 721 ++++++++++++++++++ tests/dbt/test_manifest.py | 2 +- tests/dbt/test_model.py | 11 + tests/dbt/test_transformation.py | 125 ++- .../materializations/custom_incremental.sql | 61 ++ .../models/custom_incremental_model.sql | 20 + .../models/custom_incremental_with_filter.sql | 9 + tests/fixtures/dbt/sushi_test/profiles.yml | 1 + 16 files changed, 1321 insertions(+), 11 deletions(-) create mode 100644 tests/dbt/test_custom_materializations.py create mode 100644 tests/fixtures/dbt/sushi_test/macros/materializations/custom_incremental.sql create mode 100644 tests/fixtures/dbt/sushi_test/models/custom_incremental_model.sql create mode 100644 tests/fixtures/dbt/sushi_test/models/custom_incremental_with_filter.sql diff --git a/sqlmesh/core/model/kind.py b/sqlmesh/core/model/kind.py index dc5f533c21..7b8e88ac17 100644 --- a/sqlmesh/core/model/kind.py +++ b/sqlmesh/core/model/kind.py @@ -119,6 +119,10 @@ def is_custom(self) -> bool: def is_managed(self) -> bool: return self.model_kind_name == ModelKindName.MANAGED + @property + def is_dbt_custom(self) -> bool: + return self.model_kind_name == ModelKindName.DBT_CUSTOM + @property def is_symbolic(self) -> bool: """A symbolic model is one that doesn't execute at all.""" @@ -170,6 +174,7 @@ class ModelKindName(str, ModelKindMixin, Enum): EXTERNAL = "EXTERNAL" CUSTOM = "CUSTOM" MANAGED = "MANAGED" + DBT_CUSTOM = "DBT_CUSTOM" @property def model_kind_name(self) -> t.Optional[ModelKindName]: @@ -887,6 +892,46 @@ def supports_python_models(self) -> bool: return False +class DbtCustomKind(_ModelKind): + name: t.Literal[ModelKindName.DBT_CUSTOM] = ModelKindName.DBT_CUSTOM + materialization: str + adapter: str = "default" + definition: str + dialect: t.Optional[str] = Field(None, validate_default=True) + + _dialect_validator = kind_dialect_validator + + @field_validator("materialization", "adapter", "definition", mode="before") + @classmethod + def _validate_fields(cls, v: t.Any) -> str: + return validate_string(v) + + @property + def data_hash_values(self) -> t.List[t.Optional[str]]: + return [ + *super().data_hash_values, + self.materialization, + self.definition, + self.adapter, + self.dialect, + ] + + def to_expression( + self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + ) -> d.ModelKind: + return super().to_expression( + expressions=[ + *(expressions or []), + *_properties( + { + "materialization": exp.Literal.string(self.materialization), + "adapter": exp.Literal.string(self.adapter), + } + ), + ], + ) + + class EmbeddedKind(_ModelKind): name: t.Literal[ModelKindName.EMBEDDED] = ModelKindName.EMBEDDED @@ -992,6 +1037,7 @@ def to_expression( SCDType2ByColumnKind, CustomKind, ManagedKind, + DbtCustomKind, ], Field(discriminator="name"), ] @@ -1011,6 +1057,7 @@ def to_expression( ModelKindName.SCD_TYPE_2_BY_COLUMN: SCDType2ByColumnKind, ModelKindName.CUSTOM: CustomKind, ModelKindName.MANAGED: ManagedKind, + ModelKindName.DBT_CUSTOM: DbtCustomKind, } diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 70cc31b0a4..4ac87199c6 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -50,7 +50,7 @@ ViewKind, CustomKind, ) -from sqlmesh.core.model.kind import _Incremental +from sqlmesh.core.model.kind import _Incremental, DbtCustomKind from sqlmesh.utils import CompletionStatus, columns_to_types_all_known from sqlmesh.core.schema_diff import ( has_drop_alteration, @@ -67,7 +67,7 @@ SnapshotTableCleanupTask, ) from sqlmesh.core.snapshot.execution_tracker import QueryExecutionTracker -from sqlmesh.utils import random_id, CorrelationId +from sqlmesh.utils import random_id, CorrelationId, AttributeDict from sqlmesh.utils.concurrency import ( concurrent_apply_to_snapshots, concurrent_apply_to_values, @@ -83,6 +83,7 @@ format_additive_change_msg, AdditiveChangeError, ) +from sqlmesh.utils.jinja import MacroReturnVal if sys.version_info >= (3, 12): from importlib import metadata @@ -747,7 +748,10 @@ def _evaluate_snapshot( adapter.transaction(), adapter.session(snapshot.model.render_session_properties(**render_statements_kwargs)), ): - adapter.execute(model.render_pre_statements(**render_statements_kwargs)) + evaluation_strategy = _evaluation_strategy(snapshot, adapter) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, render_kwargs=render_statements_kwargs + ) if not target_table_exists or (model.is_seed and not snapshot.intervals): # Only create the empty table if the columns were provided explicitly by the user @@ -817,7 +821,9 @@ def _evaluate_snapshot( batch_index=batch_index, ) - adapter.execute(model.render_post_statements(**render_statements_kwargs)) + evaluation_strategy.run_post_statements( + snapshot=snapshot, render_kwargs=render_statements_kwargs + ) return wap_id @@ -1433,7 +1439,9 @@ def _execute_create( "table_mapping": {snapshot.name: table_name}, } if run_pre_post_statements: - adapter.execute(snapshot.model.render_pre_statements(**create_render_kwargs)) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, render_kwargs=create_render_kwargs + ) evaluation_strategy.create( table_name=table_name, model=snapshot.model, @@ -1445,7 +1453,9 @@ def _execute_create( physical_properties=rendered_physical_properties, ) if run_pre_post_statements: - adapter.execute(snapshot.model.render_post_statements(**create_render_kwargs)) + evaluation_strategy.run_post_statements( + snapshot=snapshot, render_kwargs=create_render_kwargs + ) def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex) -> bool: adapter = self.get_adapter(snapshot.model.gateway) @@ -1456,6 +1466,7 @@ def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex and adapter.SUPPORTS_CLONING # managed models cannot have their schema mutated because theyre based on queries, so clone + alter wont work and not snapshot.is_managed + and not snapshot.is_dbt_custom and not deployability_index.is_deployable(snapshot) # If the deployable table is missing we can't clone it and adapter.table_exists(snapshot.table_name()) @@ -1540,6 +1551,19 @@ def _evaluation_strategy(snapshot: SnapshotInfoLike, adapter: EngineAdapter) -> klass = ViewStrategy elif snapshot.is_scd_type_2: klass = SCDType2Strategy + elif snapshot.is_dbt_custom: + if hasattr(snapshot, "model") and isinstance( + (model_kind := snapshot.model.kind), DbtCustomKind + ): + return DbtCustomMaterializationStrategy( + adapter=adapter, + materialization_name=model_kind.materialization, + materialization_template=model_kind.definition, + ) + + raise SQLMeshError( + f"Expected DbtCustomKind for dbt custom materialization in model '{snapshot.name}'" + ) elif snapshot.is_custom: if snapshot.custom_materialization is None: raise SQLMeshError( @@ -1679,6 +1703,24 @@ def demote(self, view_name: str, **kwargs: t.Any) -> None: view_name: The name of the target view in the virtual layer. """ + @abc.abstractmethod + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + """Executes the snapshot's pre statements. + + Args: + snapshot: The target snapshot. + render_kwargs: Additional key-value arguments to pass when rendering the statements. + """ + + @abc.abstractmethod + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + """Executes the snapshot's post statements. + + Args: + snapshot: The target snapshot. + render_kwargs: Additional key-value arguments to pass when rendering the statements. + """ + class SymbolicStrategy(EvaluationStrategy): def insert( @@ -1740,6 +1782,12 @@ def promote( def demote(self, view_name: str, **kwargs: t.Any) -> None: pass + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: + pass + + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: + pass + class EmbeddedStrategy(SymbolicStrategy): def promote( @@ -1787,6 +1835,12 @@ def demote(self, view_name: str, **kwargs: t.Any) -> None: logger.info("Dropping view '%s'", view_name) self.adapter.drop_view(view_name, cascade=False) + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + self.adapter.execute(snapshot.model.render_pre_statements(**render_kwargs)) + + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + self.adapter.execute(snapshot.model.render_post_statements(**render_kwargs)) + class MaterializableStrategy(PromotableStrategy, abc.ABC): def create( @@ -2593,6 +2647,145 @@ def get_custom_materialization_type_or_raise( raise SQLMeshError(f"Custom materialization '{name}' not present in the Python environment") +class DbtCustomMaterializationStrategy(MaterializableStrategy): + def __init__( + self, + adapter: EngineAdapter, + materialization_name: str, + materialization_template: str, + ): + super().__init__(adapter) + self.materialization_name = materialization_name + self.materialization_template = materialization_template + + def create( + self, + table_name: str, + model: Model, + is_table_deployable: bool, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + original_query = model.render_query_or_raise(**render_kwargs) + self._execute_materialization( + table_name=table_name, + query_or_df=original_query.limit(0), + model=model, + is_first_insert=True, + render_kwargs=render_kwargs, + create_only=True, + **kwargs, + ) + + def insert( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + self._execute_materialization( + table_name=table_name, + query_or_df=query_or_df, + model=model, + is_first_insert=is_first_insert, + render_kwargs=render_kwargs, + **kwargs, + ) + + def append( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + return self.insert( + table_name, + query_or_df, + model, + is_first_insert=False, + render_kwargs=render_kwargs, + **kwargs, + ) + + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + # in dbt custom materialisations it's up to the user when to run the pre hooks + pass + + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + # in dbt custom materialisations it's up to the user when to run the post hooks + pass + + def _execute_materialization( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + create_only: bool = False, + **kwargs: t.Any, + ) -> None: + jinja_macros = model.jinja_macros + + # For vdes we need to use the table, since we don't know the schema/table at parse time + parts = exp.to_table(table_name, dialect=self.adapter.dialect) + + existing_globals = jinja_macros.global_objs + relation_info = existing_globals.get("this") + if isinstance(relation_info, dict): + relation_info["database"] = parts.catalog + relation_info["identifier"] = parts.name + relation_info["name"] = parts.name + + jinja_globals = { + **existing_globals, + "this": relation_info, + "database": parts.catalog, + "schema": parts.db, + "identifier": parts.name, + "target": existing_globals.get("target", {"type": self.adapter.dialect}), + "execution_dt": kwargs.get("execution_time"), + "engine_adapter": self.adapter, + "sql": str(query_or_df), + "is_first_insert": is_first_insert, + "create_only": create_only, + # FIXME: Add support for transaction=False + "pre_hooks": [ + AttributeDict({"sql": s.this.this, "transaction": True}) + for s in model.pre_statements + ], + "post_hooks": [ + AttributeDict({"sql": s.this.this, "transaction": True}) + for s in model.post_statements + ], + "model_instance": model, + **kwargs, + } + + try: + jinja_env = jinja_macros.build_environment(**jinja_globals) + template = jinja_env.from_string(self.materialization_template) + + try: + template.render() + except MacroReturnVal as ret: + # this is a successful return from a macro call (dbt uses this list of Relations to update their relation cache) + returned_relations = ret.value.get("relations", []) + logger.info( + f"Materialization {self.materialization_name} returned relations: {returned_relations}" + ) + + except Exception as e: + raise SQLMeshError( + f"Failed to execute dbt materialization '{self.materialization_name}': {e}" + ) from e + + class EngineManagedStrategy(MaterializableStrategy): def create( self, diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 4dcf44a0af..0b75955129 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -57,6 +57,12 @@ class Materialization(str, Enum): # Snowflake, https://docs.getdbt.com/reference/resource-configs/snowflake-configs#dynamic-tables DYNAMIC_TABLE = "dynamic_table" + CUSTOM = "custom" + + @classmethod + def _missing_(cls, value): # type: ignore + return cls.CUSTOM + class SnapshotStrategy(str, Enum): """DBT snapshot strategies""" @@ -295,6 +301,14 @@ def sqlmesh_model_kwargs( # precisely which variables are referenced in the model dependencies.variables |= set(context.variables) + if ( + getattr(self, "model_materialization", None) == Materialization.CUSTOM + and hasattr(self, "_get_custom_materialization") + and (custom_mat := self._get_custom_materialization(context)) + ): + # include custom materialization dependencies as they might use macros + dependencies = dependencies.union(custom_mat.dependencies) + model_dialect = self.dialect(context) model_context = context.context_for_dependencies( dependencies.union(self.tests_ref_source_dependencies) diff --git a/sqlmesh/dbt/builtin.py b/sqlmesh/dbt/builtin.py index e284c11797..b8180bc011 100644 --- a/sqlmesh/dbt/builtin.py +++ b/sqlmesh/dbt/builtin.py @@ -546,6 +546,7 @@ def create_builtin_globals( "statement": sql_execution.statement, "graph": adapter.graph, "selected_resources": list(jinja_globals.get("selected_models") or []), + "write": lambda input: None, # We don't support writing yet } ) diff --git a/sqlmesh/dbt/manifest.py b/sqlmesh/dbt/manifest.py index 0e33569888..17c5e91700 100644 --- a/sqlmesh/dbt/manifest.py +++ b/sqlmesh/dbt/manifest.py @@ -47,7 +47,7 @@ from sqlmesh.dbt.builtin import BUILTIN_FILTERS, BUILTIN_GLOBALS, OVERRIDDEN_MACROS from sqlmesh.dbt.common import Dependencies from sqlmesh.dbt.model import ModelConfig -from sqlmesh.dbt.package import HookConfig, MacroConfig +from sqlmesh.dbt.package import HookConfig, MacroConfig, MaterializationConfig from sqlmesh.dbt.seed import SeedConfig from sqlmesh.dbt.source import SourceConfig from sqlmesh.dbt.target import TargetConfig @@ -75,6 +75,7 @@ SourceConfigs = t.Dict[str, SourceConfig] MacroConfigs = t.Dict[str, MacroConfig] HookConfigs = t.Dict[str, HookConfig] +MaterializationConfigs = t.Dict[str, MaterializationConfig] IGNORED_PACKAGES = {"elementary"} @@ -135,6 +136,7 @@ def __init__( self._on_run_start_per_package: t.Dict[str, HookConfigs] = defaultdict(dict) self._on_run_end_per_package: t.Dict[str, HookConfigs] = defaultdict(dict) + self._materializations: MaterializationConfigs = {} def tests(self, package_name: t.Optional[str] = None) -> TestConfigs: self._load_all() @@ -164,6 +166,10 @@ def on_run_end(self, package_name: t.Optional[str] = None) -> HookConfigs: self._load_all() return self._on_run_end_per_package[package_name or self._project_name] + def materializations(self) -> MaterializationConfigs: + self._load_all() + return self._materializations + @property def all_macros(self) -> t.Dict[str, t.Dict[str, MacroInfo]]: self._load_all() @@ -213,6 +219,7 @@ def _load_all(self) -> None: self._calls = {k: (v, False) for k, v in (self._call_cache.get("") or {}).items()} self._load_macros() + self._load_materializations() self._load_sources() self._load_tests() self._load_models_and_seeds() @@ -250,11 +257,14 @@ def _load_sources(self) -> None: def _load_macros(self) -> None: for macro in self._manifest.macros.values(): + if macro.name.startswith("materialization_"): + continue + if macro.name.startswith("test_"): macro.macro_sql = _convert_jinja_test_to_macro(macro.macro_sql) dependencies = Dependencies(macros=_macro_references(self._manifest, macro)) - if not macro.name.startswith("materialization_") and not macro.name.startswith("test_"): + if not macro.name.startswith("test_"): dependencies = dependencies.union( self._extra_dependencies(macro.macro_sql, macro.package_name) ) @@ -281,6 +291,32 @@ def _load_macros(self) -> None: if pos > 0 and name[pos + 2 :] in adapter_macro_names: macro_config.info.is_top_level = True + def _load_materializations(self) -> None: + for macro in self._manifest.macros.values(): + if macro.name.startswith("materialization_"): + # Extract name and adapter ( "materialization_{name}_{adapter}" or "materialization_{name}_default") + name_parts = macro.name.split("_") + if len(name_parts) >= 3: + mat_name = "_".join(name_parts[1:-1]) + adapter = name_parts[-1] + + dependencies = Dependencies(macros=_macro_references(self._manifest, macro)) + macro.macro_sql = _strip_jinja_materialization_tags(macro.macro_sql) + dependencies = dependencies.union( + self._extra_dependencies(macro.macro_sql, macro.package_name) + ) + + materialization_config = MaterializationConfig( + name=mat_name, + adapter=adapter, + definition=macro.macro_sql, + dependencies=dependencies, + path=Path(macro.original_file_path), + ) + + key = f"{mat_name}_{adapter}" + self._materializations[key] = materialization_config + def _load_tests(self) -> None: for node in self._manifest.nodes.values(): if node.resource_type != "test": @@ -359,6 +395,12 @@ def _load_models_and_seeds(self) -> None: dependencies = dependencies.union( self._extra_dependencies(sql, node.package_name, track_all_model_attrs=True) ) + for hook in [*node_config.get("pre-hook", []), *node_config.get("post-hook", [])]: + dependencies = dependencies.union( + self._extra_dependencies( + hook["sql"], node.package_name, track_all_model_attrs=True + ) + ) dependencies = dependencies.union( self._flatten_dependencies_from_macros(dependencies.macros, node.package_name) ) @@ -732,3 +774,27 @@ def _convert_jinja_test_to_macro(test_jinja: str) -> str: macro = macro_tag + test_jinja[match.span()[-1] :] return re.sub(ENDTEST_REGEX, lambda m: m.group(0).replace("endtest", "endmacro"), macro) + + +def _strip_jinja_materialization_tags(materialization_jinja: str) -> str: + MATERIALIZATION_TAG_REGEX = r"\s*{%-?\s*materialization\s+[^%]*%}\s*\n?" + ENDMATERIALIZATION_REGEX = r"{%-?\s*endmaterialization\s*-?%}\s*\n?" + + if not re.match(MATERIALIZATION_TAG_REGEX, materialization_jinja): + return materialization_jinja + + materialization_jinja = re.sub( + MATERIALIZATION_TAG_REGEX, + "", + materialization_jinja, + flags=re.IGNORECASE, + ) + + materialization_jinja = re.sub( + ENDMATERIALIZATION_REGEX, + "", + materialization_jinja, + flags=re.IGNORECASE, + ) + + return materialization_jinja.strip() diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index f6cb81f30f..f47283d06e 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -31,6 +31,7 @@ OnAdditiveChange, on_destructive_change_validator, on_additive_change_validator, + DbtCustomKind, ) from sqlmesh.dbt.basemodel import BaseModelConfig, Materialization, SnapshotStrategy from sqlmesh.dbt.common import SqlStr, sql_str_validator @@ -40,6 +41,7 @@ if t.TYPE_CHECKING: from sqlmesh.core.audit.definition import ModelAudit from sqlmesh.dbt.context import DbtContext + from sqlmesh.dbt.package import MaterializationConfig logger = logging.getLogger(__name__) @@ -444,6 +446,19 @@ def model_kind(self, context: DbtContext) -> ModelKind: if materialization == Materialization.DYNAMIC_TABLE: return ManagedKind() + if materialization == Materialization.CUSTOM: + if custom_materialization := self._get_custom_materialization(context): + return DbtCustomKind( + materialization=self.materialized, + adapter=custom_materialization.adapter, + dialect=self.dialect(context), + definition=custom_materialization.definition, + ) + + raise ConfigError( + f"Unknown materialization '{self.materialized}'. Custom materializations must be defined in your dbt project." + ) + raise ConfigError(f"{materialization.value} materialization not supported.") def _big_query_partition_by_expr(self, context: DbtContext) -> exp.Expression: @@ -483,6 +498,18 @@ def _big_query_partition_by_expr(self, context: DbtContext) -> exp.Expression: dialect="bigquery", ) + def _get_custom_materialization(self, context: DbtContext) -> t.Optional[MaterializationConfig]: + materializations = context.manifest.materializations() + name, target_adapter = self.materialized, context.target.dialect + + adapter_specific_key = f"{name}_{target_adapter}" + default_key = f"{name}_default" + if adapter_specific_key in materializations: + return materializations[adapter_specific_key] + if default_key in materializations: + return materializations[default_key] + return None + @property def sqlmesh_config_fields(self) -> t.Set[str]: return super().sqlmesh_config_fields | { diff --git a/sqlmesh/dbt/package.py b/sqlmesh/dbt/package.py index 420cf3cb73..dbaa832c22 100644 --- a/sqlmesh/dbt/package.py +++ b/sqlmesh/dbt/package.py @@ -37,6 +37,16 @@ class HookConfig(PydanticModel): dependencies: Dependencies +class MaterializationConfig(PydanticModel): + """Class to contain custom materialization configuration.""" + + name: str + adapter: str + definition: str + dependencies: Dependencies + path: Path + + class Package(PydanticModel): """Class to contain package configuration""" @@ -47,6 +57,7 @@ class Package(PydanticModel): models: t.Dict[str, ModelConfig] variables: t.Dict[str, t.Any] macros: t.Dict[str, MacroConfig] + materializations: t.Dict[str, MaterializationConfig] on_run_start: t.Dict[str, HookConfig] on_run_end: t.Dict[str, HookConfig] files: t.Set[Path] @@ -94,6 +105,7 @@ def load(self, package_root: Path) -> Package: models = _fix_paths(self._context.manifest.models(package_name), package_root) seeds = _fix_paths(self._context.manifest.seeds(package_name), package_root) macros = _fix_paths(self._context.manifest.macros(package_name), package_root) + materializations = _fix_paths(self._context.manifest.materializations(), package_root) on_run_start = _fix_paths(self._context.manifest.on_run_start(package_name), package_root) on_run_end = _fix_paths(self._context.manifest.on_run_end(package_name), package_root) sources = self._context.manifest.sources(package_name) @@ -114,13 +126,16 @@ def load(self, package_root: Path) -> Package: seeds=seeds, variables=package_variables, macros=macros, + materializations=materializations, files=config_paths, on_run_start=on_run_start, on_run_end=on_run_end, ) -T = t.TypeVar("T", TestConfig, ModelConfig, MacroConfig, SeedConfig, HookConfig) +T = t.TypeVar( + "T", TestConfig, ModelConfig, MacroConfig, MaterializationConfig, SeedConfig, HookConfig +) def _fix_paths(configs: t.Dict[str, T], package_root: Path) -> t.Dict[str, T]: diff --git a/sqlmesh/utils/jinja.py b/sqlmesh/utils/jinja.py index 508c6dce2d..59e9f6dd2f 100644 --- a/sqlmesh/utils/jinja.py +++ b/sqlmesh/utils/jinja.py @@ -369,6 +369,7 @@ def build_environment(self, **kwargs: t.Any) -> Environment: context.update(builtin_globals) context.update(root_macros) context.update(package_macros) + context["render"] = lambda input: env.from_string(input).render() env.globals.update(context) env.filters.update(self._environment.filters) diff --git a/tests/dbt/test_custom_materializations.py b/tests/dbt/test_custom_materializations.py new file mode 100644 index 0000000000..9e7a94315c --- /dev/null +++ b/tests/dbt/test_custom_materializations.py @@ -0,0 +1,721 @@ +from __future__ import annotations + +import typing as t +from pathlib import Path + +import pytest + +from sqlmesh import Context +from sqlmesh.core.config import ModelDefaultsConfig +from sqlmesh.core.model.kind import DbtCustomKind +from sqlmesh.dbt.context import DbtContext +from sqlmesh.dbt.manifest import ManifestHelper +from sqlmesh.dbt.model import ModelConfig +from sqlmesh.dbt.profile import Profile +from sqlmesh.dbt.basemodel import Materialization + +pytestmark = pytest.mark.dbt + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_manifest_loading(): + project_path = Path("tests/fixtures/dbt/sushi_test") + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + materializations = helper.materializations() + + # custom materialization should have loaded from the manifest + assert "custom_incremental_default" in materializations + custom_incremental = materializations["custom_incremental_default"] + assert custom_incremental.name == "custom_incremental" + assert custom_incremental.adapter == "default" + assert "make_temp_relation(new_relation)" in custom_incremental.definition + assert "run_hooks(pre_hooks)" in custom_incremental.definition + assert " {{ return({'relations': [new_relation]}) }}" in custom_incremental.definition + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_model_config(): + project_path = Path("tests/fixtures/dbt/sushi_test") + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + models = helper.models() + + custom_model = models["custom_incremental_model"] + assert isinstance(custom_model, ModelConfig) + assert custom_model.materialized == "custom_incremental" + assert custom_model.model_materialization == Materialization.CUSTOM + + # pre and post hooks should also be handled in custom materializations + assert len(custom_model.pre_hook) == 2 + assert ( + custom_model.pre_hook[1].sql + == "CREATE TABLE IF NOT EXISTS hook_table (id INTEGER, length_col TEXT, updated_at TIMESTAMP)" + ) + assert len(custom_model.post_hook) == 2 + assert "COALESCE(MAX(id), 0)" in custom_model.post_hook[1].sql + + custom_filter_model = models["custom_incremental_with_filter"] + assert isinstance(custom_filter_model, ModelConfig) + assert custom_filter_model.materialized == "custom_incremental" + assert custom_filter_model.model_materialization == Materialization.CUSTOM + assert custom_filter_model.interval == "2 day" + assert custom_filter_model.time_column == "created_at" + + # verify also that the global hooks are inherited in the model without + assert len(custom_filter_model.pre_hook) == 1 + assert len(custom_filter_model.post_hook) == 1 + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_model_kind(): + project_path = Path("tests/fixtures/dbt/sushi_test") + context = DbtContext(project_path) + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + context._target = profile.target + context._manifest = helper + models = helper.models() + + # custom materialization models get DbtCustomKind populated + custom_model = models["custom_incremental_model"] + kind = custom_model.model_kind(context) + assert isinstance(kind, DbtCustomKind) + assert kind.materialization == "custom_incremental" + assert kind.adapter == "default" + assert "create_table_as" in kind.definition + + custom_filter_model = models["custom_incremental_with_filter"] + kind = custom_filter_model.model_kind(context) + assert isinstance(kind, DbtCustomKind) + assert kind.materialization == "custom_incremental" + assert kind.adapter == "default" + assert "run_hooks" in kind.definition + + # the DbtCustomKind shouldnt be set for normal strategies + regular_model = models["simple_model_a"] + regular_kind = regular_model.model_kind(context) + assert not isinstance(regular_kind, DbtCustomKind) + + # verify in sqlmesh as well + sqlmesh_context = Context( + paths=["tests/fixtures/dbt/sushi_test"], + config=None, + ) + + custom_incremental = sqlmesh_context.get_model("sushi.custom_incremental_model") + assert isinstance(custom_incremental.kind, DbtCustomKind) + assert custom_incremental.kind.materialization == "custom_incremental" + + custom_with_filter = sqlmesh_context.get_model("sushi.custom_incremental_with_filter") + assert isinstance(custom_with_filter.kind, DbtCustomKind) + assert custom_with_filter.kind.materialization == "custom_incremental" + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_dependencies(): + project_path = Path("tests/fixtures/dbt/sushi_test") + context = DbtContext(project_path) + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + context._target = profile.target + context._manifest = helper + models = helper.models() + + # custom materialization uses macros that should appear in dependencies + for model_name in ["custom_incremental_model", "custom_incremental_with_filter"]: + materialization_deps = models[model_name]._get_custom_materialization(context) + assert materialization_deps is not None + assert len(materialization_deps.dependencies.macros) > 0 + macro_names = [macro.name for macro in materialization_deps.dependencies.macros] + expected_macros = [ + "build_incremental_filter_sql", + "Relation", + "create_table_as", + "make_temp_relation", + "run_hooks", + "statement", + ] + assert any(macro in macro_names for macro in expected_macros) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_adapter_specific_materialization_override(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + macros_dir = temp_project / "macros" / "materializations" + macros_dir.mkdir(parents=True, exist_ok=True) + + adapter_mat_content = """ +{%- materialization custom_adapter_test, default -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT 'default_adapter' as adapter_type, * FROM ({{ sql }}) AS subquery + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} + +{%- materialization custom_adapter_test, adapter='postgres' -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT 'postgres_adapter'::text as adapter_type, * FROM ({{ sql }}) AS subquery + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} + +{%- materialization custom_adapter_test, adapter='duckdb' -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT 'duckdb_adapter' as adapter_type, * FROM ({{ sql }}) AS subquery + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} +""".strip() + + (macros_dir / "custom_adapter_test.sql").write_text(adapter_mat_content) + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_adapter_test', +) }} + +SELECT + 1 as id, + 'test' as name +""".strip() + + (models_dir / "test_adapter_specific.sql").write_text(test_model_content) + + context = DbtContext(temp_project) + profile = Profile.load(context) + + helper = ManifestHelper( + temp_project, + temp_project, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + materializations = helper.materializations() + assert "custom_adapter_test_default" in materializations + assert "custom_adapter_test_duckdb" in materializations + assert "custom_adapter_test_postgres" in materializations + + default_mat = materializations["custom_adapter_test_default"] + assert "default_adapter" in default_mat.definition + assert default_mat.adapter == "default" + + duckdb_mat = materializations["custom_adapter_test_duckdb"] + assert "duckdb_adapter" in duckdb_mat.definition + assert duckdb_mat.adapter == "duckdb" + + postgres_mat = materializations["custom_adapter_test_postgres"] + assert "postgres_adapter" in postgres_mat.definition + assert postgres_mat.adapter == "postgres" + + # verify that the correct adapter is selected based on target + context._target = profile.target + context._manifest = helper + models = helper.models() + + test_model = models["test_adapter_specific"] + + kind = test_model.model_kind(context) + assert isinstance(kind, DbtCustomKind) + assert kind.materialization == "custom_adapter_test" + # Should use duckdb adapter since that's the default target + assert "duckdb_adapter" in kind.definition or "default_adapter" in kind.definition + + # test also that adapter-specific materializations execute with correct adapter + sushi_context = Context(paths=path) + + plan = sushi_context.plan(select_models=["sushi.test_adapter_specific"]) + sushi_context.apply(plan) + + # check that the table was created with the correct adapter type + result = sushi_context.engine_adapter.fetchdf("SELECT * FROM sushi.test_adapter_specific") + assert len(result) == 1 + assert "adapter_type" in result.columns + assert result["adapter_type"][0] == "duckdb_adapter" + assert result["id"][0] == 1 + assert result["name"][0] == "test" + + +@pytest.mark.xdist_group("dbt_manifest") +def test_missing_custom_materialization_error(): + from sqlmesh.utils.errors import ConfigError + + project_path = Path("tests/fixtures/dbt/sushi_test") + context = DbtContext(project_path) + profile = Profile.load(context) + + # the materialization is non-existent + fake_model_config = ModelConfig( + name="test_model", + path=project_path / "models" / "fake_model.sql", + raw_code="SELECT 1 as id", + materialized="non_existent_custom", + schema="test_schema", + ) + + context._target = profile.target + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + context._manifest = helper + + # Should raise ConfigError when trying to get the model kind + with pytest.raises(ConfigError) as e: + fake_model_config.model_kind(context) + + assert "Unknown materialization 'non_existent_custom'" in str(e.value) + assert "Custom materializations must be defined" in str(e.value) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_broken_jinja_materialization_error(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + macros_dir = temp_project / "macros" / "materializations" + macros_dir.mkdir(parents=True, exist_ok=True) + + # Create broken Jinja materialization + broken_mat_content = """ +{%- materialization broken_jinja, default -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {# An intentional undefined variable that will cause runtime error #} + {%- set broken_var = undefined_variable_that_does_not_exist + 10 -%} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT * FROM ({{ sql }}) AS subquery + WHERE 1 = {{ broken_var }} + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} +""".strip() + + (macros_dir / "broken_jinja.sql").write_text(broken_mat_content) + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='broken_jinja', +) }} + +SELECT + 1 as id, + 'This should fail with Jinja error' as error_msg +""".strip() + + (models_dir / "test_broken_jinja.sql").write_text(test_model_content) + + sushi_context = Context(paths=path) + + # The model will load fine jinja won't fail at parse time + model = sushi_context.get_model("sushi.test_broken_jinja") + assert isinstance(model.kind, DbtCustomKind) + assert model.kind.materialization == "broken_jinja" + + # but execution should fail + with pytest.raises(Exception) as e: + plan = sushi_context.plan(select_models=["sushi.test_broken_jinja"]) + sushi_context.apply(plan) + + assert "plan application failed" in str(e.value).lower() + + +@pytest.mark.xdist_group("dbt_manifest") +def test_failing_hooks_in_materialization(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_incremental', + pre_hook="CREATE TABLE will_fail_due_to_intentional_syntax_error (", + post_hook="DROP TABLE non_existent_table_that_will_fail", +) }} + +SELECT + 1 as id, + 'Testing hook failures' as test_msg +""".strip() + + (models_dir / "test_failing_hooks.sql").write_text(test_model_content) + + sushi_context = Context(paths=[str(temp_project)]) + + # in this case the pre_hook has invalid syntax + with pytest.raises(Exception) as e: + plan = sushi_context.plan(select_models=["sushi.test_failing_hooks"]) + sushi_context.apply(plan) + + assert "plan application failed" in str(e.value).lower() + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_virtual_environments(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='created_at', +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + 1 as id, + 'venv_test' as test_type +""".strip() + + (models_dir / "test_venv_model.sql").write_text(test_model_content) + + sushi_context = Context(paths=path) + prod_plan = sushi_context.plan(select_models=["sushi.test_venv_model"]) + sushi_context.apply(prod_plan) + prod_result = sushi_context.engine_adapter.fetchdf( + "SELECT * FROM sushi.test_venv_model ORDER BY id" + ) + assert len(prod_result) == 1 + assert prod_result["id"][0] == 1 + assert prod_result["test_type"][0] == "venv_test" + + # Create dev environment and check the dev table was created with proper naming + dev_plan = sushi_context.plan("dev", select_models=["sushi.test_venv_model"]) + sushi_context.apply(dev_plan) + dev_result = sushi_context.engine_adapter.fetchdf( + "SELECT * FROM sushi__dev.test_venv_model ORDER BY id" + ) + assert len(dev_result) == 1 + assert dev_result["id"][0] == 1 + assert dev_result["test_type"][0] == "venv_test" + + dev_tables = sushi_context.engine_adapter.fetchdf(""" + SELECT table_name, table_schema + FROM system.information_schema.tables + WHERE table_schema LIKE 'sushi%dev%' + AND table_name LIKE '%test_venv_model%' + """) + + prod_tables = sushi_context.engine_adapter.fetchdf(""" + SELECT table_name, table_schema + FROM system.information_schema.tables + WHERE table_schema = 'sushi' + AND table_name LIKE '%test_venv_model%' + """) + + # Verify both environments have their own tables + assert len(dev_tables) >= 1 + assert len(prod_tables) >= 1 + + +@pytest.mark.xdist_group("dbt_manifest") +def test_virtual_environment_schema_names(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='created_at', +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + 1 as id, + 'schema_naming_test' as test_type +""".strip() + + (models_dir / "test_schema_naming.sql").write_text(test_model_content) + + context = Context(paths=path) + prod_plan = context.plan(select_models=["sushi.test_schema_naming"]) + context.apply(prod_plan) + + dev_plan = context.plan("dev", select_models=["sushi.test_schema_naming"]) + context.apply(dev_plan) + + prod_result = context.engine_adapter.fetchdf( + "SELECT * FROM sushi.test_schema_naming ORDER BY id" + ) + assert len(prod_result) == 1 + assert prod_result["test_type"][0] == "schema_naming_test" + + dev_result = context.engine_adapter.fetchdf( + "SELECT * FROM sushi__dev.test_schema_naming ORDER BY id" + ) + assert len(dev_result) == 1 + assert dev_result["test_type"][0] == "schema_naming_test" + + # to examine the schema structure + all_schemas_query = """ + SELECT DISTINCT table_schema, COUNT(*) as table_count + FROM system.information_schema.tables + WHERE table_schema LIKE '%sushi%' + AND table_name LIKE '%test_schema_naming%' + GROUP BY table_schema + ORDER BY table_schema + """ + + schema_info = context.engine_adapter.fetchdf(all_schemas_query) + + schema_names = schema_info["table_schema"].tolist() + + # - virtual schemas: sushi, sushi__dev (for views) + view_schemas = [s for s in schema_names if not s.startswith("sqlmesh__")] + + # - physical schema: sqlmesh__sushi (for actual data tables) + physical_schemas = [s for s in schema_names if s.startswith("sqlmesh__")] + + # verify we got both of them + assert len(view_schemas) >= 2 + assert len(physical_schemas) >= 1 + assert "sushi" in view_schemas + assert "sushi__dev" in view_schemas + assert any("sqlmesh__sushi" in s for s in physical_schemas) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_lineage_tracking(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + # create a custom materialization model that depends on simple_model_a and waiter_names seed + lineage_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='created_at', +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + w.id as waiter_id, + w.name as waiter_name, + s.a as simple_value, + w.id * s.a as computed_value, + 'lineage_test' as model_type +FROM {{ ref('waiter_names') }} w +CROSS JOIN {{ ref('simple_model_a') }} s +""".strip() + + (models_dir / "enhanced_waiter_data.sql").write_text(lineage_model_content) + + # Create another custom materialization model that depends on the first one and simple_model_b + downstream_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='analysis_date', +) }} + +SELECT + CURRENT_TIMESTAMP as analysis_date, + e.waiter_name, + e.simple_value, + e.computed_value, + b.a as model_b_value, + e.computed_value + b.a as final_computation, + CASE + WHEN e.computed_value >= 5 THEN 'High' + WHEN e.computed_value >= 2 THEN 'Medium' + ELSE 'Low' + END as category, + 'downstream_lineage_test' as model_type +FROM {{ ref('enhanced_waiter_data') }} e +CROSS JOIN {{ ref('simple_model_b') }} b +WHERE e.computed_value >= 0 +""".strip() + + (models_dir / "waiter_analytics_summary.sql").write_text(downstream_model_content) + + context = Context(paths=path) + enhanced_data_model = context.get_model("sushi.enhanced_waiter_data") + analytics_summary_model = context.get_model("sushi.waiter_analytics_summary") + + # Verify that custom materialization models have proper model kinds + assert isinstance(enhanced_data_model.kind, DbtCustomKind) + assert enhanced_data_model.kind.materialization == "custom_incremental" + + assert isinstance(analytics_summary_model.kind, DbtCustomKind) + assert analytics_summary_model.kind.materialization == "custom_incremental" + + # - enhanced_waiter_data should depend on waiter_names and simple_model_a + enhanced_data_deps = enhanced_data_model.depends_on + assert '"memory"."sushi"."simple_model_a"' in enhanced_data_deps + assert '"memory"."sushi"."waiter_names"' in enhanced_data_deps + + # - waiter_analytics_summary should depend on enhanced_waiter_data and simple_model_b + analytics_deps = analytics_summary_model.depends_on + assert '"memory"."sushi"."enhanced_waiter_data"' in analytics_deps + assert '"memory"."sushi"."simple_model_b"' in analytics_deps + + # build only the models that have dependences + plan = context.plan( + select_models=[ + "sushi.waiter_names", + "sushi.simple_model_a", + "sushi.simple_model_b", + "sushi.enhanced_waiter_data", + "sushi.waiter_analytics_summary", + ] + ) + context.apply(plan) + + # Verify that all δοwnstream models were built and contain expected data + waiter_names_result = context.engine_adapter.fetchdf( + "SELECT COUNT(*) as count FROM sushi.waiter_names" + ) + assert waiter_names_result["count"][0] > 0 + + simple_a_result = context.engine_adapter.fetchdf("SELECT a FROM sushi.simple_model_a") + assert len(simple_a_result) > 0 + assert simple_a_result["a"][0] == 1 + + simple_b_result = context.engine_adapter.fetchdf("SELECT a FROM sushi.simple_model_b") + assert len(simple_b_result) > 0 + assert simple_b_result["a"][0] == 1 + + # Check intermediate custom materialization model + enhanced_data_result = context.engine_adapter.fetchdf(""" + SELECT + waiter_name, + simple_value, + computed_value, + model_type + FROM sushi.enhanced_waiter_data + ORDER BY waiter_id + LIMIT 5 + """) + + assert len(enhanced_data_result) > 0 + assert enhanced_data_result["model_type"][0] == "lineage_test" + assert all(val == 1 for val in enhanced_data_result["simple_value"]) + assert all(val >= 0 for val in enhanced_data_result["computed_value"]) + assert any(val == "Ryan" for val in enhanced_data_result["waiter_name"]) + + # Check final downstream custom materialization model + analytics_summary_result = context.engine_adapter.fetchdf(""" + SELECT + waiter_name, + category, + model_type, + final_computation + FROM sushi.waiter_analytics_summary + ORDER BY waiter_name + LIMIT 5 + """) + + assert len(analytics_summary_result) > 0 + assert analytics_summary_result["model_type"][0] == "downstream_lineage_test" + assert all(cat in ["High", "Medium", "Low"] for cat in analytics_summary_result["category"]) + assert all(val >= 0 for val in analytics_summary_result["final_computation"]) + + # Test that lineage information is preserved in dev environments + dev_plan = context.plan("dev", select_models=["sushi.waiter_analytics_summary"]) + context.apply(dev_plan) + + dev_analytics_result = context.engine_adapter.fetchdf(""" + SELECT + COUNT(*) as count, + COUNT(DISTINCT waiter_name) as unique_waiters + FROM sushi__dev.waiter_analytics_summary + """) + + prod_analytics_result = context.engine_adapter.fetchdf(""" + SELECT + COUNT(*) as count, + COUNT(DISTINCT waiter_name) as unique_waiters + FROM sushi.waiter_analytics_summary + """) + + # Dev and prod should have the same data as they share physical data + assert dev_analytics_result["count"][0] == prod_analytics_result["count"][0] + assert dev_analytics_result["unique_waiters"][0] == prod_analytics_result["unique_waiters"][0] diff --git a/tests/dbt/test_manifest.py b/tests/dbt/test_manifest.py index e6c02bcb4c..e2e7bc706c 100644 --- a/tests/dbt/test_manifest.py +++ b/tests/dbt/test_manifest.py @@ -232,7 +232,7 @@ def test_source_meta_external_location(): expected = ( "read_parquet('path/to/external/items.parquet')" if DBT_VERSION >= (1, 4, 0) - else '"main"."parquet_file".items' + else '"memory"."parquet_file".items' ) assert relation.render() == expected diff --git a/tests/dbt/test_model.py b/tests/dbt/test_model.py index d212872cb7..e29c6768bf 100644 --- a/tests/dbt/test_model.py +++ b/tests/dbt/test_model.py @@ -842,3 +842,14 @@ def test_jinja_config_no_query(create_empty_project): # loads without error and contains empty query (which will error at runtime) assert not context.snapshots['"local"."main"."comment_config_model"'].model.render_query() + + +@pytest.mark.slow +def test_load_custom_materialisations(sushi_test_dbt_context: Context) -> None: + context = sushi_test_dbt_context + assert context.get_model("sushi.custom_incremental_model") + assert context.get_model("sushi.custom_incremental_with_filter") + + context.load() + assert context.get_model("sushi.custom_incremental_model") + assert context.get_model("sushi.custom_incremental_with_filter") diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index a640d620b7..9a9ce8f906 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -1,5 +1,5 @@ import agate -from datetime import datetime +from datetime import datetime, timedelta import json import logging import typing as t @@ -113,6 +113,129 @@ def test_materialization(): ModelConfig(name="model", alias="model", schema="schema", materialized="dictionary") +def test_dbt_custom_materialization(): + sushi_context = Context(paths=["tests/fixtures/dbt/sushi_test"]) + + plan_builder = sushi_context.plan_builder(select_models=["sushi.custom_incremental_model"]) + plan = plan_builder.build() + assert len(plan.selected_models) == 1 + selected_model = list(plan.selected_models)[0] + assert selected_model == "model.sushi.custom_incremental_model" + + qoery = "SELECT * FROM sushi.custom_incremental_model ORDER BY created_at" + hook_table = "SELECT * FROM hook_table ORDER BY id" + sushi_context.apply(plan) + result = sushi_context.engine_adapter.fetchdf(qoery) + assert len(result) == 1 + assert {"created_at", "id"}.issubset(result.columns) + + # assert the pre/post hooks executed as well as part of the custom materialization + hook_result = sushi_context.engine_adapter.fetchdf(hook_table) + assert len(hook_result) == 1 + assert {"length_col", "id", "updated_at"}.issubset(hook_result.columns) + assert int(hook_result["length_col"][0]) >= 519 + assert hook_result["id"][0] == 1 + + # running with execution time one day in the future to simulate an incremental insert + tomorrow = datetime.now() + timedelta(days=1) + sushi_context.run(select_models=["sushi.custom_incremental_model"], execution_time=tomorrow) + + result_after_run = sushi_context.engine_adapter.fetchdf(qoery) + assert {"created_at", "id"}.issubset(result_after_run.columns) + + # this should have added new unique values for the new row + assert len(result_after_run) == 2 + assert result_after_run["id"].is_unique + assert result_after_run["created_at"].is_unique + + # validate the hooks executed as part of the run as well + hook_result = sushi_context.engine_adapter.fetchdf(hook_table) + assert len(hook_result) == 2 + assert hook_result["id"][1] == 2 + assert int(hook_result["length_col"][1]) >= 519 + assert hook_result["id"].is_monotonic_increasing + assert hook_result["updated_at"].is_unique + assert not hook_result["length_col"].is_unique + + +def test_dbt_custom_materialization_with_time_filter_and_macro(): + sushi_context = Context(paths=["tests/fixtures/dbt/sushi_test"]) + today = datetime.now() + + # select both custom materialiasation models with the wildcard + selector = ["sushi.custom_incremental*"] + plan_builder = sushi_context.plan_builder(select_models=selector, execution_time=today) + plan = plan_builder.build() + + assert len(plan.selected_models) == 2 + assert { + "model.sushi.custom_incremental_model", + "model.sushi.custom_incremental_with_filter", + }.issubset(plan.selected_models) + + # the model that daily (default cron) populates with data + select_daily = "SELECT * FROM sushi.custom_incremental_model ORDER BY created_at" + + # this model uses `run_started_at` as a filter (which we populate with execution time) with 2 day interval + select_filter = "SELECT * FROM sushi.custom_incremental_with_filter ORDER BY created_at" + + sushi_context.apply(plan) + result = sushi_context.engine_adapter.fetchdf(select_daily) + assert len(result) == 1 + assert {"created_at", "id"}.issubset(result.columns) + + result = sushi_context.engine_adapter.fetchdf(select_filter) + assert len(result) == 1 + assert {"created_at", "id"}.issubset(result.columns) + + # - run ONE DAY LATER + a_day_later = today + timedelta(days=1) + sushi_context.run(select_models=selector, execution_time=a_day_later) + result_after_run = sushi_context.engine_adapter.fetchdf(select_daily) + + # the new row is inserted in the normal incremental model + assert len(result_after_run) == 2 + assert {"created_at", "id"}.issubset(result_after_run.columns) + assert result_after_run["id"].is_unique + assert result_after_run["created_at"].is_unique + + # this model due to the filter shouldn't populate with any new data + result_after_run_filter = sushi_context.engine_adapter.fetchdf(select_filter) + assert len(result_after_run_filter) == 1 + assert {"created_at", "id"}.issubset(result_after_run_filter.columns) + assert result.equals(result_after_run_filter) + assert result_after_run_filter["id"].is_unique + assert result_after_run_filter["created_at"][0].date() == today.date() + + # - run TWO DAYS LATER + two_days_later = a_day_later + timedelta(days=1) + sushi_context.run(select_models=selector, execution_time=two_days_later) + result_after_run = sushi_context.engine_adapter.fetchdf(select_daily) + + # again a new row is inserted in the normal model + assert len(result_after_run) == 3 + assert {"created_at", "id"}.issubset(result_after_run.columns) + assert result_after_run["id"].is_unique + assert result_after_run["created_at"].is_unique + + # the model with the filter now should populate as well + result_after_run_filter = sushi_context.engine_adapter.fetchdf(select_filter) + assert len(result_after_run_filter) == 2 + assert {"created_at", "id"}.issubset(result_after_run_filter.columns) + assert result_after_run_filter["id"].is_unique + assert result_after_run_filter["created_at"][0].date() == today.date() + assert result_after_run_filter["created_at"][1].date() == two_days_later.date() + + # assert hooks have executed for both plan and incremental runs + hook_result = sushi_context.engine_adapter.fetchdf("SELECT * FROM hook_table ORDER BY id") + assert len(hook_result) == 3 + hook_result["id"][0] == 1 + assert hook_result["id"].is_monotonic_increasing + assert hook_result["updated_at"].is_unique + assert int(hook_result["length_col"][1]) >= 519 + assert not hook_result["length_col"].is_unique + + def test_model_kind(): context = DbtContext() context.project_name = "Test" diff --git a/tests/fixtures/dbt/sushi_test/macros/materializations/custom_incremental.sql b/tests/fixtures/dbt/sushi_test/macros/materializations/custom_incremental.sql new file mode 100644 index 0000000000..c61899c8ff --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/macros/materializations/custom_incremental.sql @@ -0,0 +1,61 @@ +{%- macro build_incremental_filter_sql(sql, time_column, existing_relation, interval_config) -%} + {# macro to build the filter and also test use of macro inside materialisation #} + WITH source_data AS ( + {{ sql }} + ) + SELECT * FROM source_data + WHERE {{ time_column }} >= ( + SELECT COALESCE(MAX({{ time_column }}), '1900-01-01') + {%- if interval_config %} + INTERVAL {{ interval_config }} {%- endif %} + FROM {{ existing_relation }} + ) +{%- endmacro -%} + +{%- materialization custom_incremental, default -%} + {%- set existing_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + {%- set temp_relation = make_temp_relation(new_relation) -%} + + {%- set time_column = config.get('time_column') -%} + {%- set interval_config = config.get('interval') -%} + + {{ run_hooks(pre_hooks) }} + + {%- if existing_relation is none -%} + {# The first insert creates new table if it doesn't exist #} + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} + AS {{ sql }} + {%- endcall -%} + {%- else -%} + {# Incremental load, appending new data with optional time filtering #} + {%- if time_column is not none -%} + {%- set filtered_sql -%} + {{ build_incremental_filter_sql(sql, time_column, existing_relation, interval_config) }} + {%- endset -%} + {%- else -%} + {%- set filtered_sql = sql -%} + {%- endif -%} + + {{log(filtered_sql, info=true)}} + + {%- call statement('create_temp') -%} + {{ create_table_as(True, temp_relation, filtered_sql) }} + CREATE TABLE {{ temp_relation }} + AS {{ filtered_sql }} + {%- endcall -%} + + {%- call statement('insert') -%} + INSERT INTO {{ new_relation }} + SELECT * FROM {{ temp_relation }} + {%- endcall -%} + + {%- call statement('drop_temp') -%} + DROP TABLE {{ temp_relation }} + {%- endcall -%} + {%- endif -%} + + {{ run_hooks(post_hooks) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} diff --git a/tests/fixtures/dbt/sushi_test/models/custom_incremental_model.sql b/tests/fixtures/dbt/sushi_test/models/custom_incremental_model.sql new file mode 100644 index 0000000000..c7e9a8f7ea --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/models/custom_incremental_model.sql @@ -0,0 +1,20 @@ +{{ config( + materialized='custom_incremental', + pre_hook=[ + "CREATE TABLE IF NOT EXISTS hook_table (id INTEGER, length_col TEXT, updated_at TIMESTAMP)" + ], + post_hook=[ + """ + INSERT INTO hook_table + SELECT + COALESCE(MAX(id), 0) + 1 AS id, + '{{ model.raw_code | length }}' AS length_col, + CURRENT_TIMESTAMP AS updated_at + FROM hook_table + """ + ] +) }} + +SELECT + current_timestamp as created_at, + hash(current_timestamp) as id, \ No newline at end of file diff --git a/tests/fixtures/dbt/sushi_test/models/custom_incremental_with_filter.sql b/tests/fixtures/dbt/sushi_test/models/custom_incremental_with_filter.sql new file mode 100644 index 0000000000..94cbdc9333 --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/models/custom_incremental_with_filter.sql @@ -0,0 +1,9 @@ +{{ config( + materialized='custom_incremental', + time_column='created_at', + interval='2 day' +) }} + +SELECT + CAST('{{ run_started_at }}' AS TIMESTAMP) as created_at, + hash('{{ run_started_at }}') as id, \ No newline at end of file diff --git a/tests/fixtures/dbt/sushi_test/profiles.yml b/tests/fixtures/dbt/sushi_test/profiles.yml index 056c3c2b91..f49ad8ea0f 100644 --- a/tests/fixtures/dbt/sushi_test/profiles.yml +++ b/tests/fixtures/dbt/sushi_test/profiles.yml @@ -3,6 +3,7 @@ sushi: in_memory: type: duckdb schema: sushi + database: memory duckdb: type: duckdb path: 'local.duckdb' From c9b49b4e7ef96b77d83b042227acbb25743c34d9 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Wed, 1 Oct 2025 15:15:37 +0300 Subject: [PATCH 031/173] Chore!: bump sqlglot to v27.20.0 (#5460) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9b192d6a78..053b242813 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.19.0", + "sqlglot[rs]~=27.20.0", "tenacity", "time-machine", "json-stream" From a95955c24c3fa15327c2c26c93e8b81cf114c9ce Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:49:35 -0700 Subject: [PATCH 032/173] fix: support unicode characters in file cache (#5463) --- sqlmesh/utils/__init__.py | 9 ++++++++- sqlmesh/utils/cache.py | 2 +- tests/utils/__init__.py | 23 +++++++++++++++++++++++ tests/utils/test_cache.py | 1 + 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/sqlmesh/utils/__init__.py b/sqlmesh/utils/__init__.py index c220de4847..5b1b077216 100644 --- a/sqlmesh/utils/__init__.py +++ b/sqlmesh/utils/__init__.py @@ -21,6 +21,7 @@ from functools import lru_cache, reduce, wraps from pathlib import Path +import unicodedata from sqlglot import exp from sqlglot.dialects.dialect import Dialects @@ -291,8 +292,14 @@ def sqlglot_dialects() -> str: NON_ALNUM = re.compile(r"[^a-zA-Z0-9_]") +NON_ALUM_INCLUDE_UNICODE = re.compile(r"\W", flags=re.UNICODE) -def sanitize_name(name: str) -> str: + +def sanitize_name(name: str, *, include_unicode: bool = False) -> str: + if include_unicode: + s = unicodedata.normalize("NFC", name) + s = NON_ALUM_INCLUDE_UNICODE.sub("_", s) + return s return NON_ALNUM.sub("_", name) diff --git a/sqlmesh/utils/cache.py b/sqlmesh/utils/cache.py index 002248f511..4b557e43b6 100644 --- a/sqlmesh/utils/cache.py +++ b/sqlmesh/utils/cache.py @@ -133,7 +133,7 @@ def clear(self) -> None: def _cache_entry_path(self, name: str, entry_id: str = "") -> Path: entry_file_name = "__".join(p for p in (self._cache_version, name, entry_id) if p) - full_path = self._path / sanitize_name(entry_file_name) + full_path = self._path / sanitize_name(entry_file_name, include_unicode=True) if IS_WINDOWS: # handle paths longer than 260 chars full_path = fix_windows_path(full_path) diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index e69de29bb2..744ad37757 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -0,0 +1,23 @@ +import pytest + +from sqlmesh.utils import sanitize_name + + +@pytest.mark.parametrize( + "raw,exclude_unicode,include_unicode", + [ + ("simple", "simple", "simple"), + ("snake_case", "snake_case", "snake_case"), + ("客户数据", "____", "客户数据"), + ("客户-数据 v2", "______v2", "客户_数据_v2"), + ("中文,逗号", "_____", "中文_逗号"), + ("a/b", "a_b", "a_b"), + ("spaces\tand\nnewlines", "spaces_and_newlines", "spaces_and_newlines"), + ("data📦2025", "data_2025", "data_2025"), + ("MiXeD123_名字", "MiXeD123___", "MiXeD123_名字"), + ("", "", ""), + ], +) +def test_sanitize_name_no_(raw, exclude_unicode, include_unicode): + assert sanitize_name(raw) == exclude_unicode + assert sanitize_name(raw, include_unicode=True) == include_unicode diff --git a/tests/utils/test_cache.py b/tests/utils/test_cache.py index cd1fdb0115..0b6d335446 100644 --- a/tests/utils/test_cache.py +++ b/tests/utils/test_cache.py @@ -39,6 +39,7 @@ def test_file_cache(tmp_path: Path, mocker: MockerFixture): loader.assert_called_once() assert "___test_model_" in cache._cache_entry_path('"test_model"').name + assert "客户数据" in cache._cache_entry_path("客户数据").name def test_optimized_query_cache(tmp_path: Path, mocker: MockerFixture): From 956670c14c04037b7d0322de918c994697a8de14 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 2 Oct 2025 16:24:50 +1300 Subject: [PATCH 033/173] Feat(sqlmesh_dbt): Implement --model and --resource-type (#5443) --- sqlmesh/core/selector.py | 81 ++++++++++++++++--- sqlmesh_dbt/cli.py | 32 +++++++- sqlmesh_dbt/operations.py | 26 ++++-- sqlmesh_dbt/selectors.py | 40 ++++++++- tests/core/test_selector_dbt.py | 63 +++++++++++++++ ...st_selector.py => test_selector_native.py} | 0 tests/dbt/cli/test_list.py | 23 ++++++ tests/dbt/cli/test_selectors.py | 58 +++++++++++++ 8 files changed, 306 insertions(+), 17 deletions(-) create mode 100644 tests/core/test_selector_dbt.py rename tests/core/{test_selector.py => test_selector_native.py} (100%) diff --git a/sqlmesh/core/selector.py b/sqlmesh/core/selector.py index 1484d06cee..3865327acd 100644 --- a/sqlmesh/core/selector.py +++ b/sqlmesh/core/selector.py @@ -16,6 +16,7 @@ from sqlmesh.core.dialect import normalize_model_name from sqlmesh.core.environment import Environment from sqlmesh.core.model import update_model_schemas +from sqlmesh.core.audit import StandaloneAudit from sqlmesh.utils import UniqueKeyDict from sqlmesh.utils.dag import DAG from sqlmesh.utils.git import GitClient @@ -25,6 +26,7 @@ if t.TYPE_CHECKING: from typing_extensions import Literal as Lit # noqa from sqlmesh.core.model import Model + from sqlmesh.core.node import Node from sqlmesh.core.state_sync import StateReader @@ -167,7 +169,7 @@ def get_model(fqn: str) -> t.Optional[Model]: return models def expand_model_selections( - self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Model]] = None + self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Node]] = None ) -> t.Set[str]: """Expands a set of model selections into a set of model fqns that can be looked up in the Context. @@ -180,7 +182,7 @@ def expand_model_selections( node = parse(" | ".join(f"({s})" for s in model_selections)) - all_models = models or self._models + all_models: t.Dict[str, Node] = models or dict(self._models) models_by_tags: t.Dict[str, t.Set[str]] = {} for fqn, model in all_models.items(): @@ -226,6 +228,13 @@ def evaluate(node: exp.Expression) -> t.Set[str]: if fnmatch.fnmatchcase(tag, pattern) } return models_by_tags.get(pattern, set()) + if isinstance(node, ResourceType): + resource_type = node.name.lower() + return { + fqn + for fqn, model in all_models.items() + if self._matches_resource_type(resource_type, model) + } if isinstance(node, Direction): selected = set() @@ -243,36 +252,49 @@ def evaluate(node: exp.Expression) -> t.Set[str]: return evaluate(node) @abc.abstractmethod - def _model_name(self, model: Model) -> str: + def _model_name(self, model: Node) -> str: """Given a model, return the name that a selector pattern contining wildcards should be fnmatch'd on""" pass @abc.abstractmethod - def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Model]) -> t.Set[str]: + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]: """Given a pattern, return the keys of the matching models from :all_models""" pass + @abc.abstractmethod + def _matches_resource_type(self, resource_type: str, model: Node) -> bool: + """Indicate whether or not the supplied model matches the supplied resource type""" + pass + class NativeSelector(Selector): """Implementation of selectors that matches objects based on SQLMesh native names""" - def _model_name(self, model: Model) -> str: + def _model_name(self, model: Node) -> str: return model.name - def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Model]) -> t.Set[str]: + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]: fqn = normalize_model_name(pattern, self._default_catalog, self._dialect) return {fqn} if fqn in all_models else set() + def _matches_resource_type(self, resource_type: str, model: Node) -> bool: + if resource_type == "model": + return model.is_model + if resource_type == "audit": + return isinstance(model, StandaloneAudit) + + raise SQLMeshError(f"Unsupported resource type: {resource_type}") + class DbtSelector(Selector): """Implementation of selectors that matches objects based on the DBT names instead of the SQLMesh native names""" - def _model_name(self, model: Model) -> str: + def _model_name(self, model: Node) -> str: if dbt_fqn := model.dbt_fqn: return dbt_fqn raise SQLMeshError("dbt node information must be populated to use dbt selectors") - def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Model]) -> t.Set[str]: + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]: # a pattern like "staging.customers" should match a model called "jaffle_shop.staging.customers" # but not a model called "jaffle_shop.customers.staging" # also a pattern like "aging" should not match "staging" so we need to consider components; not substrings @@ -306,6 +328,40 @@ def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Model]) - matches.add(fqn) return matches + def _matches_resource_type(self, resource_type: str, model: Node) -> bool: + """ + ref: https://docs.getdbt.com/reference/node-selection/methods#resource_type + + # supported by SQLMesh + "model" + "seed" + "source" # external model + "test" # standalone audit + + # not supported by SQLMesh yet, commented out to throw an error if someone tries to use them + "analysis" + "exposure" + "metric" + "saved_query" + "semantic_model" + "snapshot" + "unit_test" + """ + if resource_type not in ("model", "seed", "source", "test"): + raise SQLMeshError(f"Unsupported resource type: {resource_type}") + + if isinstance(model, StandaloneAudit): + return resource_type == "test" + + if resource_type == "model": + return model.is_model and not model.kind.is_external and not model.kind.is_seed + if resource_type == "source": + return model.kind.is_external + if resource_type == "seed": + return model.kind.is_seed + + return False + class SelectorDialect(Dialect): IDENTIFIERS_CAN_START_WITH_DIGIT = True @@ -336,6 +392,10 @@ class Tag(exp.Expression): pass +class ResourceType(exp.Expression): + pass + + class Direction(exp.Expression): pass @@ -388,7 +448,8 @@ def _parse_var() -> exp.Expression: upstream = _match(TokenType.PLUS) downstream = None tag = _parse_kind("tag") - git = False if tag else _parse_kind("git") + resource_type = False if tag else _parse_kind("resource_type") + git = False if resource_type else _parse_kind("git") lstar = "*" if _match(TokenType.STAR) else "" directions = {} @@ -414,6 +475,8 @@ def _parse_var() -> exp.Expression: if tag: this = Tag(this=this) + if resource_type: + this = ResourceType(this=this) if git: this = Git(this=this) if directions: diff --git a/sqlmesh_dbt/cli.py b/sqlmesh_dbt/cli.py index fa75d303a1..83230de3fd 100644 --- a/sqlmesh_dbt/cli.py +++ b/sqlmesh_dbt/cli.py @@ -33,15 +33,39 @@ def _cleanup() -> None: select_option = click.option( "-s", - "-m", "--select", + multiple=True, + help="Specify the nodes to include.", +) +model_option = click.option( + "-m", "--models", "--model", multiple=True, - help="Specify the nodes to include.", + help="Specify the model nodes to include; other nodes are excluded.", ) exclude_option = click.option("--exclude", multiple=True, help="Specify the nodes to exclude.") +# TODO: expand this out into --resource-type/--resource-types and --exclude-resource-type/--exclude-resource-types +resource_types = [ + "metric", + "semantic_model", + "saved_query", + "source", + "analysis", + "model", + "test", + "unit_test", + "exposure", + "snapshot", + "seed", + "default", + "all", +] +resource_type_option = click.option( + "--resource-type", type=click.Choice(resource_types, case_sensitive=False) +) + @click.group(cls=ErrorHandlingGroup, invoke_without_command=True) @click.option("--profile", help="Which existing profile to load. Overrides output.profile") @@ -86,7 +110,9 @@ def dbt( @dbt.command() @select_option +@model_option @exclude_option +@resource_type_option @click.option( "-f", "--full-refresh", @@ -116,7 +142,9 @@ def run( @dbt.command(name="list") @select_option +@model_option @exclude_option +@resource_type_option @vars_option @click.pass_context def list_(ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]], **kwargs: t.Any) -> None: diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py index a157705ffd..6e8b452b28 100644 --- a/sqlmesh_dbt/operations.py +++ b/sqlmesh_dbt/operations.py @@ -26,12 +26,16 @@ def list_( self, select: t.Optional[t.List[str]] = None, exclude: t.Optional[t.List[str]] = None, + models: t.Optional[t.List[str]] = None, + resource_type: t.Optional[str] = None, ) -> None: # dbt list prints: # - models # - "data tests" (audits) for those models # it also applies selectors which is useful for testing selectors - selected_models = list(self._selected_models(select, exclude).values()) + selected_models = list( + self._selected_models(select, exclude, models, resource_type).values() + ) self.console.list_models( selected_models, {k: v.node for k, v in self.context.snapshots.items()} ) @@ -41,13 +45,19 @@ def run( environment: t.Optional[str] = None, select: t.Optional[t.List[str]] = None, exclude: t.Optional[t.List[str]] = None, + models: t.Optional[t.List[str]] = None, + resource_type: t.Optional[str] = None, full_refresh: bool = False, empty: bool = False, ) -> Plan: + consolidated_select, consolidated_exclude = selectors.consolidate( + select or [], exclude or [], models or [], resource_type + ) + plan_builder = self._plan_builder( environment=environment, - select=select, - exclude=exclude, + select=consolidated_select, + exclude=consolidated_exclude, full_refresh=full_refresh, empty=empty, ) @@ -86,9 +96,15 @@ def _plan_builder( ) def _selected_models( - self, select: t.Optional[t.List[str]] = None, exclude: t.Optional[t.List[str]] = None + self, + select: t.Optional[t.List[str]] = None, + exclude: t.Optional[t.List[str]] = None, + models: t.Optional[t.List[str]] = None, + resource_type: t.Optional[str] = None, ) -> t.Dict[str, Model]: - if sqlmesh_selector := selectors.to_sqlmesh(select or [], exclude or []): + if sqlmesh_selector := selectors.to_sqlmesh( + *selectors.consolidate(select or [], exclude or [], models or [], resource_type) + ): if self.debug: self.console.print(f"dbt --select: {select}") self.console.print(f"dbt --exclude: {exclude}") diff --git a/sqlmesh_dbt/selectors.py b/sqlmesh_dbt/selectors.py index 120d5dcb36..5821586ad3 100644 --- a/sqlmesh_dbt/selectors.py +++ b/sqlmesh_dbt/selectors.py @@ -4,7 +4,45 @@ logger = logging.getLogger(__name__) -def to_sqlmesh(dbt_select: t.Collection[str], dbt_exclude: t.Collection[str]) -> t.Optional[str]: +def consolidate( + select: t.List[str], + exclude: t.List[str], + models: t.List[str], + resource_type: t.Optional[str], +) -> t.Tuple[t.List[str], t.List[str]]: + """ + Given a bunch of dbt CLI arguments that may or may not be defined: + --select, --exclude, --models, --resource-type + + Combine them into a single set of --select/--exclude node selectors, throwing an error if mutually exclusive combinations are provided + Note that the returned value is still in dbt format, pass it to to_sqlmesh() to create a selector for the sqlmesh selector engine + """ + if models and select: + raise ValueError('"models" and "select" are mutually exclusive arguments') + + if models and resource_type: + raise ValueError('"models" and "resource_type" are mutually exclusive arguments') + + if models: + # --models implies resource_type:model + resource_type = "model" + + if resource_type: + resource_type_selector = f"resource_type:{resource_type}" + all_selectors = [*select, *models] + select = ( + [ + f"resource_type:{resource_type},{original_selector}" + for original_selector in all_selectors + ] + if all_selectors + else [resource_type_selector] + ) + + return select, exclude + + +def to_sqlmesh(dbt_select: t.List[str], dbt_exclude: t.List[str]) -> t.Optional[str]: """ Given selectors defined in the format of the dbt cli --select and --exclude arguments, convert them into a selector expression that the SQLMesh selector engine can understand. diff --git a/tests/core/test_selector_dbt.py b/tests/core/test_selector_dbt.py new file mode 100644 index 0000000000..112c5740ac --- /dev/null +++ b/tests/core/test_selector_dbt.py @@ -0,0 +1,63 @@ +import typing as t +import pytest +from pytest_mock import MockerFixture +from sqlglot import exp +from sqlmesh.core.model.kind import SeedKind, ExternalKind, FullKind +from sqlmesh.core.model.seed import Seed +from sqlmesh.core.model.definition import SqlModel, SeedModel, ExternalModel +from sqlmesh.core.audit.definition import StandaloneAudit +from sqlmesh.core.snapshot.definition import Node +from sqlmesh.core.selector import DbtSelector +from sqlmesh.core.selector import parse, ResourceType +from sqlmesh.utils.errors import SQLMeshError +import sqlmesh.core.dialect as d +from sqlmesh.utils import UniqueKeyDict + + +def test_parse_resource_type(): + assert parse("resource_type:foo") == ResourceType(this=exp.Var(this="foo")) + + +@pytest.mark.parametrize( + "resource_type,expected", + [ + ("model", {'"test"."normal_model"'}), + ("seed", {'"test"."seed_model"'}), + ("test", {'"test"."standalone_audit"'}), + ("source", {'"external"."model"'}), + ], +) +def test_expand_model_selections_resource_type( + mocker: MockerFixture, resource_type: str, expected: t.Set[str] +): + models: t.Dict[str, Node] = { + '"test"."normal_model"': SqlModel( + name="test.normal_model", + kind=FullKind(), + query=d.parse_one("SELECT 'normal_model' AS what"), + ), + '"test"."seed_model"': SeedModel( + name="test.seed_model", kind=SeedKind(path="/tmp/foo"), seed=Seed(content="id,name") + ), + '"test"."standalone_audit"': StandaloneAudit( + name="test.standalone_audit", query=d.parse_one("SELECT 'standalone_audit' AS what") + ), + '"external"."model"': ExternalModel(name="external.model", kind=ExternalKind()), + } + + selector = DbtSelector(state_reader=mocker.Mock(), models=UniqueKeyDict("models")) + + assert selector.expand_model_selections([f"resource_type:{resource_type}"], models) == expected + + +def test_unsupported_resource_type(mocker: MockerFixture): + selector = DbtSelector(state_reader=mocker.Mock(), models=UniqueKeyDict("models")) + + models: t.Dict[str, Node] = { + '"test"."normal_model"': SqlModel( + name="test.normal_model", query=d.parse_one("SELECT 'normal_model' AS what") + ), + } + + with pytest.raises(SQLMeshError, match="Unsupported"): + selector.expand_model_selections(["resource_type:analysis"], models) diff --git a/tests/core/test_selector.py b/tests/core/test_selector_native.py similarity index 100% rename from tests/core/test_selector.py rename to tests/core/test_selector_native.py diff --git a/tests/dbt/cli/test_list.py b/tests/dbt/cli/test_list.py index 712d80b2fe..3e6a55125c 100644 --- a/tests/dbt/cli/test_list.py +++ b/tests/dbt/cli/test_list.py @@ -79,3 +79,26 @@ def test_list_with_vars(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Re │ └── depends_on: jaffle_shop.customers""" in result.output ) + + +def test_list_models_mutually_exclusive( + jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result] +): + result = invoke_cli(["list", "--select", "foo", "--models", "bar"]) + assert result.exit_code != 0 + assert '"models" and "select" are mutually exclusive arguments' in result.output + + result = invoke_cli(["list", "--resource-type", "test", "--models", "bar"]) + assert result.exit_code != 0 + assert '"models" and "resource_type" are mutually exclusive arguments' in result.output + + +def test_list_models(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): + result = invoke_cli(["list", "--models", "jaffle_shop"]) + assert result.exit_code == 0 + assert not result.exception + + assert "─ jaffle_shop.customers" in result.output + assert ( + "─ jaffle_shop.raw_customers" not in result.output + ) # should be excluded because dbt --models excludes seeds diff --git a/tests/dbt/cli/test_selectors.py b/tests/dbt/cli/test_selectors.py index 99907bda84..3d50fe6ed2 100644 --- a/tests/dbt/cli/test_selectors.py +++ b/tests/dbt/cli/test_selectors.py @@ -269,3 +269,61 @@ def test_selection_and_exclusion_by_dbt_names( assert sqlmesh_selector assert selector.expand_model_selections([sqlmesh_selector]) == expected + + +@pytest.mark.parametrize( + "input_args,expected", + [ + ( + dict(select=["jaffle_shop"], models=["jaffle_shop"]), + '"models" and "select" are mutually exclusive', + ), + ( + dict(models=["jaffle_shop"], resource_type="test"), + '"models" and "resource_type" are mutually exclusive', + ), + ( + dict(select=["jaffle_shop"], resource_type="test"), + (["resource_type:test,jaffle_shop"], []), + ), + (dict(resource_type="model"), (["resource_type:model"], [])), + (dict(models=["stg_customers"]), (["resource_type:model,stg_customers"], [])), + ( + dict(models=["stg_customers"], exclude=["orders"]), + (["resource_type:model,stg_customers"], ["orders"]), + ), + ], +) +def test_consolidate(input_args: t.Dict[str, t.Any], expected: t.Union[t.Tuple[str, str], str]): + all_input_args: t.Dict[str, t.Any] = dict(select=[], exclude=[], models=[], resource_type=None) + + all_input_args.update(input_args) + + def _do_assert(): + assert selectors.consolidate(**all_input_args) == expected + + if isinstance(expected, str): + with pytest.raises(ValueError, match=expected): + _do_assert() + else: + _do_assert() + + +def test_models_by_dbt_names(jaffle_shop_duckdb_context: Context): + ctx = jaffle_shop_duckdb_context + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + selector_expr = selectors.to_sqlmesh( + *selectors.consolidate(select=[], exclude=[], models=["jaffle_shop"], resource_type=None) + ) + assert selector_expr + + assert selector.expand_model_selections([selector_expr]) == { + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."orders"', + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + } From 51772bd2b2dc45c9d53706a82613d4802f838ece Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Thu, 2 Oct 2025 16:16:08 +0300 Subject: [PATCH 034/173] Chore: add Makefile command to recursively clean `.cache` dirs (#5471) --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index fbf77b8f9b..89b5be5131 100644 --- a/Makefile +++ b/Makefile @@ -107,6 +107,9 @@ ui-build: clean-build: rm -rf build/ && rm -rf dist/ && rm -rf *.egg-info +clean-caches: + find . -type d -name ".cache" -exec rm -rf {} + 2>/dev/null && echo "Successfully removed all .cache directories" + dev-publish: ui-build clean-build publish jupyter-example: From 4933d2910c4ecfc90e6b5d6896d25dd33576e4d6 Mon Sep 17 00:00:00 2001 From: Alexander Butler <41213451+z3z1ma@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:45:18 +0100 Subject: [PATCH 035/173] Fix: dont try to serialize engine adapter to sql in macro template method (#5455) Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com> --- sqlmesh/core/macros.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sqlmesh/core/macros.py b/sqlmesh/core/macros.py index b58817950d..af7c344081 100644 --- a/sqlmesh/core/macros.py +++ b/sqlmesh/core/macros.py @@ -331,6 +331,11 @@ def template(self, text: t.Any, local_variables: t.Dict[str, t.Any]) -> str: base_mapping = { k.lower(): convert_sql(v, self.dialect) for k, v in chain(self.variables.items(), self.locals.items(), local_variables.items()) + if k.lower() + not in ( + "engine_adapter", + "snapshot", + ) } return MacroStrTemplate(str(text)).safe_substitute(CaseInsensitiveMapping(base_mapping)) From d8b3e063951f5d59fe7d10a0cbf32754acc58dc4 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Thu, 2 Oct 2025 17:04:24 +0300 Subject: [PATCH 036/173] Chore: rename clean-caches to clear-caches (#5472) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 89b5be5131..2b3e10cb1b 100644 --- a/Makefile +++ b/Makefile @@ -107,7 +107,7 @@ ui-build: clean-build: rm -rf build/ && rm -rf dist/ && rm -rf *.egg-info -clean-caches: +clear-caches: find . -type d -name ".cache" -exec rm -rf {} + 2>/dev/null && echo "Successfully removed all .cache directories" dev-publish: ui-build clean-build publish From c67a2fd05a39729aad68e0344451648e48bf6e93 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Thu, 2 Oct 2025 17:07:17 +0300 Subject: [PATCH 037/173] fix: use bitnami legacy for spark (#5470) --- tests/core/engine_adapter/integration/docker/spark/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/engine_adapter/integration/docker/spark/Dockerfile b/tests/core/engine_adapter/integration/docker/spark/Dockerfile index 7fb39b840c..cfbe7d1e88 100644 --- a/tests/core/engine_adapter/integration/docker/spark/Dockerfile +++ b/tests/core/engine_adapter/integration/docker/spark/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/bitnami/spark:3.5 +FROM bitnamilegacy/spark:3.5.2 USER root RUN install_packages curl USER 1001 From 22e37d25d88855b583c3e74192326b52b282a6a7 Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Thu, 2 Oct 2025 08:16:28 -0700 Subject: [PATCH 038/173] fix: unicode in model name databricks (#5465) --- sqlmesh/core/engine_adapter/databricks.py | 2 + .../integration/test_integration.py | 37 +++++++++++++++++++ tests/core/engine_adapter/test_databricks.py | 2 +- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py index 946a7bdf74..173e1b08af 100644 --- a/sqlmesh/core/engine_adapter/databricks.py +++ b/sqlmesh/core/engine_adapter/databricks.py @@ -34,6 +34,8 @@ class DatabricksEngineAdapter(SparkEngineAdapter): SUPPORTS_CLONING = True SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True + # Spark has this set to false for compatibility when mixing with Trino but that isn't a concern with Databricks + QUOTE_IDENTIFIERS_IN_VIEWS = True SCHEMA_DIFFER_KWARGS = { "support_positional_add": True, "nested_support": NestedSupport.ALL, diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 5190d26e98..995875c778 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -3990,3 +3990,40 @@ def _set_config(gateway: str, config: Config) -> None: was_evaluated=True, day_delta=4, ) + + +def test_unicode_characters(ctx: TestContext, tmp_path: Path): + # Engines that don't quote identifiers in views are incompatible with unicode characters in model names + # at the time of writing this is Spark/Trino and they do this for compatibility reasons. + # I also think Spark may not support unicode in general but that would need to be verified. + if not ctx.engine_adapter.QUOTE_IDENTIFIERS_IN_VIEWS: + pytest.skip("Skipping as these engines have issues with unicode characters in model names") + + model_name = "客户数据" + table = ctx.table(model_name).sql(dialect=ctx.dialect) + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name {table}, + kind FULL, + dialect '{ctx.dialect}' + ); + SELECT 1 as id + """ + + (tmp_path / "models" / "客户数据.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + context.plan(auto_apply=True, no_prompts=True) + + results = ctx.get_metadata_results() + assert len(results.views) == 1 + assert results.views[0].lower() == model_name + + schema = d.to_schema(ctx.schema(), dialect=ctx.dialect) + schema_name = schema.args["db"].this + schema.args["db"].set("this", "sqlmesh__" + schema_name) + table_results = ctx.get_metadata_results(schema) + assert len(table_results.tables) == 1 + assert table_results.tables[0].lower().startswith(schema_name.lower() + "________") diff --git a/tests/core/engine_adapter/test_databricks.py b/tests/core/engine_adapter/test_databricks.py index f482361c3c..27988fed39 100644 --- a/tests/core/engine_adapter/test_databricks.py +++ b/tests/core/engine_adapter/test_databricks.py @@ -195,7 +195,7 @@ def test_materialized_view_properties(mocker: MockFixture, make_mocked_engine_ad sql_calls = to_sql_calls(adapter) # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html#syntax assert sql_calls == [ - "CREATE OR REPLACE MATERIALIZED VIEW test_table PARTITIONED BY (ds) AS SELECT 1", + "CREATE OR REPLACE MATERIALIZED VIEW `test_table` PARTITIONED BY (`ds`) AS SELECT 1", ] From 8342c37cfc72e5e791bf60fc14ce877420ef4adb Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Thu, 2 Oct 2025 11:05:33 -0700 Subject: [PATCH 039/173] feat(web_common): add components for lineage (#5385) --- pnpm-lock.yaml | 268 +- web/common/.storybook/main.ts | 2 +- web/common/.syncpackrc | 4 +- web/common/package-lock.json | 7183 ----------------- web/common/package.json | 133 +- .../src/components/CopyButton/CopyButton.tsx | 1 + web/common/src/components/Input/Input.css | 7 + web/common/src/components/Input/Input.tsx | 6 +- web/common/src/components/Lineage/Lineage.css | 3 + .../ColumnLevelLineageContext.ts | 101 + .../LineageColumnLevel/FactoryColumn.tsx | 257 + .../Lineage/LineageColumnLevel/help.ts | 233 + .../useColumnLevelLineage.ts | 49 + .../Lineage/LineageColumnLevel/useColumns.tsx | 58 + .../src/components/Lineage/LineageContext.ts | 103 + .../Lineage/LineageControlButton.tsx | 43 + .../components/Lineage/LineageControlIcon.tsx | 42 + .../src/components/Lineage/LineageLayout.tsx | 401 + .../Lineage/edge/EdgeWithGradient.tsx | 114 + .../Lineage/edge/FactoryEdgeWithGradient.tsx | 58 + .../src/components/Lineage/help.test.ts | 768 ++ web/common/src/components/Lineage/help.ts | 270 + web/common/src/components/Lineage/index.ts | 28 + .../components/Lineage/layout/dagreLayout.ts | 90 + .../src/components/Lineage/layout/help.ts | 100 + .../components/Lineage/node/NodeAppendix.tsx | 44 + .../src/components/Lineage/node/NodeBadge.tsx | 23 + .../src/components/Lineage/node/NodeBase.tsx | 31 + .../components/Lineage/node/NodeContainer.tsx | 21 + .../components/Lineage/node/NodeDetail.tsx | 26 + .../components/Lineage/node/NodeDivider.tsx | 3 + .../components/Lineage/node/NodeHandle.tsx | 31 + .../Lineage/node/NodeHandleIcon.tsx | 22 + .../components/Lineage/node/NodeHandles.tsx | 50 + .../components/Lineage/node/NodeHeader.tsx | 28 + .../src/components/Lineage/node/NodePort.tsx | 64 + .../src/components/Lineage/node/NodePorts.tsx | 44 + .../components/Lineage/node/base-handle.tsx | 27 + .../src/components/Lineage/node/base-node.tsx | 17 + .../Lineage/node/useNodeMetadata.tsx | 43 + .../Lineage/stories/Lineage.stories.tsx | 192 + .../Lineage/stories/ModelLineage.tsx | 416 + .../Lineage/stories/ModelLineageContext.ts | 97 + .../components/Lineage/stories/ModelNode.tsx | 331 + .../Lineage/stories/ModelNodeColumn.tsx | 76 + .../Lineage/stories/dagreLayout.worker.ts | 24 + .../src/components/Lineage/stories/help.ts | 29 + web/common/src/components/Lineage/utils.ts | 108 + .../MessageContainer/MessageContainer.css | 3 + .../MessageContainer/MessageContainer.tsx | 4 +- .../src/components/Metadata/Metadata.css | 4 + .../src/components/ModelName/ModelName.tsx | 8 +- .../src/components/Typography/Information.tsx | 2 +- .../components/VirtualList/FilterableList.css | 9 + .../components/VirtualList/FilterableList.tsx | 7 +- .../components/VirtualList/VirtualList.tsx | 12 +- .../src/styles/design/semantic-colors.css | 10 - web/common/tailwind.base.config.js | 32 +- web/common/tailwind.config.js | 7 +- web/common/tailwind.lineage.config.js | 95 + web/common/tsconfig.base.json | 2 +- web/common/tsconfig.build.json | 3 +- web/common/vite.config.js | 18 +- 63 files changed, 4902 insertions(+), 7383 deletions(-) delete mode 100644 web/common/package-lock.json create mode 100644 web/common/src/components/Input/Input.css create mode 100644 web/common/src/components/Lineage/Lineage.css create mode 100644 web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts create mode 100644 web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx create mode 100644 web/common/src/components/Lineage/LineageColumnLevel/help.ts create mode 100644 web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts create mode 100644 web/common/src/components/Lineage/LineageColumnLevel/useColumns.tsx create mode 100644 web/common/src/components/Lineage/LineageContext.ts create mode 100644 web/common/src/components/Lineage/LineageControlButton.tsx create mode 100644 web/common/src/components/Lineage/LineageControlIcon.tsx create mode 100644 web/common/src/components/Lineage/LineageLayout.tsx create mode 100644 web/common/src/components/Lineage/edge/EdgeWithGradient.tsx create mode 100644 web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx create mode 100644 web/common/src/components/Lineage/help.test.ts create mode 100644 web/common/src/components/Lineage/help.ts create mode 100644 web/common/src/components/Lineage/index.ts create mode 100644 web/common/src/components/Lineage/layout/dagreLayout.ts create mode 100644 web/common/src/components/Lineage/layout/help.ts create mode 100644 web/common/src/components/Lineage/node/NodeAppendix.tsx create mode 100644 web/common/src/components/Lineage/node/NodeBadge.tsx create mode 100644 web/common/src/components/Lineage/node/NodeBase.tsx create mode 100644 web/common/src/components/Lineage/node/NodeContainer.tsx create mode 100644 web/common/src/components/Lineage/node/NodeDetail.tsx create mode 100644 web/common/src/components/Lineage/node/NodeDivider.tsx create mode 100644 web/common/src/components/Lineage/node/NodeHandle.tsx create mode 100644 web/common/src/components/Lineage/node/NodeHandleIcon.tsx create mode 100644 web/common/src/components/Lineage/node/NodeHandles.tsx create mode 100644 web/common/src/components/Lineage/node/NodeHeader.tsx create mode 100644 web/common/src/components/Lineage/node/NodePort.tsx create mode 100644 web/common/src/components/Lineage/node/NodePorts.tsx create mode 100644 web/common/src/components/Lineage/node/base-handle.tsx create mode 100644 web/common/src/components/Lineage/node/base-node.tsx create mode 100644 web/common/src/components/Lineage/node/useNodeMetadata.tsx create mode 100644 web/common/src/components/Lineage/stories/Lineage.stories.tsx create mode 100644 web/common/src/components/Lineage/stories/ModelLineage.tsx create mode 100644 web/common/src/components/Lineage/stories/ModelLineageContext.ts create mode 100644 web/common/src/components/Lineage/stories/ModelNode.tsx create mode 100644 web/common/src/components/Lineage/stories/ModelNodeColumn.tsx create mode 100644 web/common/src/components/Lineage/stories/dagreLayout.worker.ts create mode 100644 web/common/src/components/Lineage/stories/help.ts create mode 100644 web/common/src/components/Lineage/utils.ts create mode 100644 web/common/src/components/MessageContainer/MessageContainer.css create mode 100644 web/common/src/components/Metadata/Metadata.css create mode 100644 web/common/src/components/VirtualList/FilterableList.css create mode 100644 web/common/tailwind.lineage.config.js diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index daaf7eb993..2fec93a8f3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -400,124 +400,151 @@ importers: web/common: devDependencies: '@eslint/js': - specifier: ^9.31.0 + specifier: 9.31.0 version: 9.31.0 '@radix-ui/react-slot': - specifier: ^1.2.3 + specifier: 1.2.3 version: 1.2.3(@types/react@18.3.23)(react@18.3.1) '@radix-ui/react-tooltip': - specifier: ^1.2.8 + specifier: 1.2.8 version: 1.2.8(@types/react-dom@18.3.7(@types/react@18.3.23))(@types/react@18.3.23)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) '@storybook/addon-docs': - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(@types/react@18.3.23)(storybook@9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0))) '@storybook/react-vite': - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(rollup@4.45.1)(storybook@9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)))(typescript@5.8.3)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) '@tailwindcss/typography': - specifier: ^0.5.16 + specifier: 0.5.16 version: 0.5.16(tailwindcss@3.4.17) '@tanstack/react-virtual': - specifier: ^3.13.12 + specifier: 3.13.12 version: 3.13.12(react-dom@18.3.1(react@18.3.1))(react@18.3.1) '@testing-library/dom': - specifier: ^10.4.1 + specifier: 10.4.1 version: 10.4.1 '@testing-library/jest-dom': - specifier: ^6.6.3 + specifier: 6.6.3 version: 6.6.3 '@testing-library/react': - specifier: ^16.3.0 + specifier: 16.3.0 version: 16.3.0(@testing-library/dom@10.4.1)(@types/react-dom@18.3.7(@types/react@18.3.23))(@types/react@18.3.23)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@testing-library/user-event': + specifier: 14.6.1 + version: 14.6.1(@testing-library/dom@10.4.1) + '@types/dagre': + specifier: 0.7.53 + version: 0.7.53 + '@types/lodash': + specifier: 4.17.20 + version: 4.17.20 '@types/node': - specifier: ^20.11.25 + specifier: 20.11.25 version: 20.11.25 '@types/react': - specifier: ^18.3.23 + specifier: 18.3.23 version: 18.3.23 '@types/react-dom': - specifier: ^18.3.7 + specifier: 18.3.7 version: 18.3.7(@types/react@18.3.23) '@vitejs/plugin-react': - specifier: ^4.7.0 + specifier: 4.7.0 version: 4.7.0(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) '@vitest/browser': - specifier: ^3.2.4 + specifier: 3.2.4 version: 3.2.4(playwright@1.54.1)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0))(vitest@3.2.4) '@xyflow/react': - specifier: ^12.8.4 + specifier: 12.8.4 version: 12.8.4(@types/react@18.3.23)(immer@9.0.21)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) autoprefixer: - specifier: ^10.4.21 + specifier: 10.4.21 version: 10.4.21(postcss@8.5.6) + browserslist: + specifier: 4.26.2 + version: 4.26.2 + caniuse-lite: + specifier: 1.0.30001746 + version: 1.0.30001746 class-variance-authority: - specifier: ^0.7.1 + specifier: 0.7.1 version: 0.7.1 clsx: - specifier: ^2.1.1 + specifier: 2.1.1 version: 2.1.1 + cronstrue: + specifier: 3.3.0 + version: 3.3.0 + dagre: + specifier: 0.8.5 + version: 0.8.5 + deepmerge: + specifier: 4.3.1 + version: 4.3.1 eslint: - specifier: ^9.31.0 + specifier: 9.31.0 version: 9.31.0(jiti@2.4.2) eslint-plugin-react-hooks: - specifier: ^5.2.0 + specifier: 5.2.0 version: 5.2.0(eslint@9.31.0(jiti@2.4.2)) eslint-plugin-storybook: - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(eslint@9.31.0(jiti@2.4.2))(storybook@9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)))(typescript@5.8.3) fuse.js: - specifier: ^7.1.0 + specifier: 7.1.0 version: 7.1.0 globals: - specifier: ^16.3.0 + specifier: 16.3.0 version: 16.3.0 + lodash: + specifier: 4.17.21 + version: 4.17.21 lucide-react: - specifier: ^0.542.0 + specifier: 0.542.0 version: 0.542.0(react@18.3.1) playwright: - specifier: ^1.54.1 + specifier: 1.54.1 version: 1.54.1 postcss: - specifier: ^8.5.6 + specifier: 8.5.6 version: 8.5.6 react: - specifier: ^18.3.1 + specifier: 18.3.1 version: 18.3.1 react-dom: - specifier: ^18.3.1 + specifier: 18.3.1 version: 18.3.1(react@18.3.1) storybook: - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) syncpack: - specifier: ^13.0.4 + specifier: 13.0.4 version: 13.0.4(typescript@5.8.3) tailwind-merge: - specifier: ^3.3.1 + specifier: 3.3.1 version: 3.3.1 tailwind-scrollbar: - specifier: ^3.1.0 + specifier: 3.1.0 version: 3.1.0(tailwindcss@3.4.17) tailwindcss: - specifier: ^3.4.17 + specifier: 3.4.17 version: 3.4.17 typescript: - specifier: ^5.8.3 + specifier: 5.8.3 version: 5.8.3 typescript-eslint: - specifier: ^8.38.0 + specifier: 8.38.0 version: 8.38.0(eslint@9.31.0(jiti@2.4.2))(typescript@5.8.3) vite: - specifier: ^6.3.5 + specifier: 6.3.5 version: 6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0) vite-plugin-dts: - specifier: ^4.5.4 + specifier: 4.5.4 version: 4.5.4(@types/node@20.11.25)(rollup@4.45.1)(typescript@5.8.3)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) vite-plugin-static-copy: - specifier: ^3.1.1 + specifier: 3.1.1 version: 3.1.1(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) vitest: - specifier: ^3.2.4 + specifier: 3.2.4 version: 3.2.4(@types/debug@4.1.12)(@types/node@20.11.25)(@vitest/browser@3.2.4)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0) packages: @@ -766,8 +793,8 @@ packages: '@codemirror/autocomplete@6.18.6': resolution: {integrity: sha512-PHHBXFomUs5DF+9tCOM/UoW6XQ4R44lLNNhRaW9PKPTU0D7lIjRg3ElxaJnTwsl/oHiR93WSXDBrekhoUGCPtg==} - '@codemirror/autocomplete@6.18.7': - resolution: {integrity: sha512-8EzdeIoWPJDsMBwz3zdzwXnUpCzMiCyz5/A3FIPpriaclFCGDkAzK13sMcnsu5rowqiyeQN2Vs2TsOcoDPZirQ==} + '@codemirror/autocomplete@6.19.0': + resolution: {integrity: sha512-61Hfv3cF07XvUxNeC3E7jhG8XNi1Yom1G0lRC936oLnlF+jrbrv8rc/J98XlYzcsAoTVupfsf5fLej1aI8kyIg==} '@codemirror/commands@6.8.1': resolution: {integrity: sha512-KlGVYufHMQzxbdQONiLyGQDUW0itrLZwq3CcY7xpv9ZLRHqzkBSoteocBHtMCoY7/Ci4xhzSrToIeLg7FxHuaw==} @@ -802,8 +829,8 @@ packages: '@codemirror/view@6.38.1': resolution: {integrity: sha512-RmTOkE7hRU3OVREqFVITWHz6ocgBjv08GoePscAakgVQfciA3SGCEk7mb9IzwW61cKKmlTpHXG6DUE5Ubx+MGQ==} - '@codemirror/view@6.38.2': - resolution: {integrity: sha512-bTWAJxL6EOFLPzTx+O5P5xAO3gTqpatQ2b/ARQ8itfU/v2LlpS3pH2fkL0A3E/Fx8Y2St2KES7ZEV0sHTsSW/A==} + '@codemirror/view@6.38.4': + resolution: {integrity: sha512-hduz0suCcUSC/kM8Fq3A9iLwInJDl8fD1xLpTIk+5xkNm8z/FT7UsIa9sOXrkpChh+XXc18RzswE8QqELsVl+g==} '@csstools/color-helpers@5.0.2': resolution: {integrity: sha512-JqWH1vsgdGcw2RR6VliXXdA0/59LttzlU8UlRT/iUUsEeWfYq8I+K0yhihEUTTHLRm1EXvpsCx3083EU15ecsA==} @@ -1179,6 +1206,9 @@ packages: '@jridgewell/trace-mapping@0.3.30': resolution: {integrity: sha512-GQ7Nw5G2lTu/BtHTKfXhKHok2WGetd4XYcVKGx00SjAk8GMwgJM3zr6zORiPGuOE+/vkc90KtTosSSvaCjKb2Q==} + '@jridgewell/trace-mapping@0.3.31': + resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@jsdevtools/ono@7.1.3': resolution: {integrity: sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==} @@ -2587,6 +2617,9 @@ packages: '@types/d3@7.4.3': resolution: {integrity: sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww==} + '@types/dagre@0.7.53': + resolution: {integrity: sha512-f4gkWqzPZvYmKhOsDnhq/R8mO4UMcKdxZo+i5SCkOU1wvGeHJeUXGIHeE9pnwGyPMDof1Vx5ZQo4nxpeg2TTVQ==} + '@types/debug@4.1.12': resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} @@ -2629,6 +2662,9 @@ packages: '@types/jsonfile@6.1.4': resolution: {integrity: sha512-D5qGUYwjvnNNextdU59/+fI+spnwtTFmyQP0h+PfIOSkNfpU6AOICUOkm4i0OnSk+NyjdPJrxCDro0sJsWlRpQ==} + '@types/lodash@4.17.20': + resolution: {integrity: sha512-H3MHACvFUEiujabxhaI/ImO6gUrd8oOurg7LQtS7mbwIXA/cUqWrvBsaeJ23aZEPk1TAYkurjfMbSELfoCXlGA==} + '@types/mdast@4.0.4': resolution: {integrity: sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==} @@ -3244,6 +3280,10 @@ packages: base64-js@1.5.1: resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + baseline-browser-mapping@2.8.9: + resolution: {integrity: sha512-hY/u2lxLrbecMEWSB0IpGzGyDyeoMFQhCvZd2jGFSE5I17Fh01sYUBPCJtkWERw7zrac9+cIghxm/ytJa2X8iA==} + hasBin: true + better-opn@3.0.2: resolution: {integrity: sha512-aVNobHnJqLiUelTaHat9DZ1qM2w0C0Eym4LPI/3JxOnSokGVdsl1T1kN7TFvsEAD8G47A6VKQ0TVHqbBnYMJlQ==} engines: {node: '>=12.0.0'} @@ -3278,13 +3318,8 @@ packages: browser-stdout@1.3.1: resolution: {integrity: sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==} - browserslist@4.25.1: - resolution: {integrity: sha512-KGj0KoOMXLpSNkkEI6Z6mShmQy0bc1I+T7K9N81k4WWMrfz+6fQ6es80B/YLAeRoKvjYE1YSHHOW1qe9xIVzHw==} - engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} - hasBin: true - - browserslist@4.25.4: - resolution: {integrity: sha512-4jYpcjabC606xJ3kw2QwGEZKX0Aw7sgQdZCvIK9dhVSPh76BKo+C+btT1RRofH7B+8iNpEbgGNVWiLki5q93yg==} + browserslist@4.26.2: + resolution: {integrity: sha512-ECFzp6uFOSB+dcZ5BK/IBaGWssbSYBHvuMeMt3MMFyhI0Z8SqGgEkBLARgpRH3hutIgPVsALcMwbDrJqPxQ65A==} engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} hasBin: true @@ -3340,11 +3375,8 @@ packages: resolution: {integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==} engines: {node: '>=10'} - caniuse-lite@1.0.30001727: - resolution: {integrity: sha512-pB68nIHmbN6L/4C6MH1DokyR3bYqFwjaSs/sWDHGj4CTcFtQUQMuJftVwWkXq7mNWOybD3KhUv3oWHoGxgP14Q==} - - caniuse-lite@1.0.30001741: - resolution: {integrity: sha512-QGUGitqsc8ARjLdgAfxETDhRbJ0REsP6O3I96TAth/mVjh2cYzN2u+3AzPP3aVSm2FehEItaJw1xd+IGBXWeSw==} + caniuse-lite@1.0.30001746: + resolution: {integrity: sha512-eA7Ys/DGw+pnkWWSE/id29f2IcPHVoE8wxtvE5JdvD2V28VTDPy1yEeo11Guz0sJ4ZeGRcm3uaTcAqK1LXaphA==} ccount@2.0.1: resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} @@ -3538,6 +3570,10 @@ packages: crelt@1.0.6: resolution: {integrity: sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==} + cronstrue@3.3.0: + resolution: {integrity: sha512-iwJytzJph1hosXC09zY8F5ACDJKerr0h3/2mOxg9+5uuFObYlgK0m35uUPk4GCvhHc2abK7NfnR9oMqY0qZFAg==} + hasBin: true + cross-spawn@7.0.6: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} @@ -3602,6 +3638,9 @@ packages: resolution: {integrity: sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==} engines: {node: '>=12'} + dagre@0.8.5: + resolution: {integrity: sha512-/aTqmnRta7x7MCCpExk7HQL2O4owCT2h8NT//9I1OQ9vt29Pa0BzSAkR5lwFUcQ7491yVi/3CXU9jQ5o0Mn2Sw==} + data-urls@5.0.0: resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==} engines: {node: '>=18'} @@ -3655,6 +3694,10 @@ packages: deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} + deepmerge@4.3.1: + resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} + engines: {node: '>=0.10.0'} + default-browser-id@5.0.0: resolution: {integrity: sha512-A6p/pu/6fyBcA1TRz/GqWYPViplrftcW2gZC9q79ngNCKAeR/X3gcEdXQHl4KNXV+3wgIJ1CPkJQ3IHM6lcsyA==} engines: {node: '>=18'} @@ -3762,11 +3805,8 @@ packages: effect@3.17.9: resolution: {integrity: sha512-Nkkn9n1zhy30Dq0MpQatDCH7nfYnOIiebkOHNxmmvoVnEDKCto+2ZwDDWFGzcN/ojwfqjRXWGC9Lo91K5kwZCg==} - electron-to-chromium@1.5.190: - resolution: {integrity: sha512-k4McmnB2091YIsdCgkS0fMVMPOJgxl93ltFzaryXqwip1AaxeDqKCGLxkXODDA5Ab/D+tV5EL5+aTx76RvLRxw==} - - electron-to-chromium@1.5.215: - resolution: {integrity: sha512-TIvGp57UpeNetj/wV/xpFNpWGb0b/ROw372lHPx5Aafx02gjTBtWnEEcaSX3W2dLM3OSdGGyHX/cHl01JQsLaQ==} + electron-to-chromium@1.5.227: + resolution: {integrity: sha512-ITxuoPfJu3lsNWUi2lBM2PaBPYgH3uqmxut5vmBxgYvyI4AlJ6P3Cai1O76mOrkJCBzq0IxWg/NtqOrpu/0gKA==} elkjs@0.8.2: resolution: {integrity: sha512-L6uRgvZTH+4OF5NE/MBbzQx/WYpru1xCBE9respNj6qznEewGUIfhzmm7horWWxbNO2M0WckQypGctR8lH79xQ==} @@ -4209,6 +4249,9 @@ packages: graphemer@1.4.0: resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==} + graphlib@2.1.8: + resolution: {integrity: sha512-jcLLfkpoVGmH7/InMC/1hIvOPSUh38oJtGhvrOFGzioE1DZ+0YW16RgmOJhHiuWTvGiJQ9Z1Ik43JvkRPRvE+A==} + has-bigints@1.1.0: resolution: {integrity: sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==} engines: {node: '>= 0.4'} @@ -5139,11 +5182,8 @@ packages: node-readfiles@0.2.0: resolution: {integrity: sha512-SU00ZarexNlE4Rjdm83vglt5Y9yiQ+XI1XpflWlb7q7UTN1JUItm69xMeiQCTxtTfnzt+83T8Cx+vI2ED++VDA==} - node-releases@2.0.19: - resolution: {integrity: sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==} - - node-releases@2.0.20: - resolution: {integrity: sha512-7gK6zSXEH6neM212JgfYFXe+GmZQM+fia5SsusuBIUgnPheLFBmIPhtFoAQRj8/7wASYQnbDlHPVwY0BefoFgA==} + node-releases@2.0.21: + resolution: {integrity: sha512-5b0pgg78U3hwXkCM8Z9b2FJdPZlr9Psr9V2gQPESdGHqbntyFJKFW4r5TeWGFzafGY3hzs1JC62VEQMbl1JFkw==} node-sarif-builder@3.2.0: resolution: {integrity: sha512-kVIOdynrF2CRodHZeP/97Rh1syTUHBNiw17hUCIVhlhEsWlfJm19MuO56s4MdKbr22xWx6mzMnNAgXzVlIYM9Q==} @@ -7047,7 +7087,7 @@ snapshots: dependencies: '@babel/compat-data': 7.28.0 '@babel/helper-validator-option': 7.27.1 - browserslist: 4.25.1 + browserslist: 4.26.2 lru-cache: 5.1.1 semver: 6.3.1 @@ -7224,11 +7264,11 @@ snapshots: '@codemirror/view': 6.38.1 '@lezer/common': 1.2.3 - '@codemirror/autocomplete@6.18.7': + '@codemirror/autocomplete@6.19.0': dependencies: '@codemirror/language': 6.11.3 '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 '@lezer/common': 1.2.3 '@codemirror/commands@6.8.1': @@ -7267,7 +7307,7 @@ snapshots: '@codemirror/language@6.11.3': dependencies: '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 '@lezer/common': 1.2.3 '@lezer/highlight': 1.2.1 '@lezer/lr': 1.4.2 @@ -7280,13 +7320,13 @@ snapshots: '@codemirror/lint@6.8.5': dependencies: '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 crelt: 1.0.6 '@codemirror/search@6.5.10': dependencies: '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 crelt: 1.0.6 '@codemirror/state@6.5.2': @@ -7297,7 +7337,7 @@ snapshots: dependencies: '@codemirror/language': 6.11.3 '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 '@lezer/highlight': 1.2.1 '@codemirror/view@6.38.1': @@ -7307,7 +7347,7 @@ snapshots: style-mod: 4.1.2 w3c-keyname: 2.2.8 - '@codemirror/view@6.38.2': + '@codemirror/view@6.38.4': dependencies: '@codemirror/state': 6.5.2 crelt: 1.0.6 @@ -7617,7 +7657,7 @@ snapshots: '@jridgewell/source-map@0.3.11': dependencies: '@jridgewell/gen-mapping': 0.3.13 - '@jridgewell/trace-mapping': 0.3.30 + '@jridgewell/trace-mapping': 0.3.31 '@jridgewell/sourcemap-codec@1.5.4': {} @@ -7633,6 +7673,11 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@jridgewell/trace-mapping@0.3.31': + dependencies: + '@jridgewell/resolve-uri': 3.1.2 + '@jridgewell/sourcemap-codec': 1.5.5 + '@jsdevtools/ono@7.1.3': {} '@jsep-plugin/assignment@1.3.0(jsep@1.4.0)': @@ -9331,6 +9376,8 @@ snapshots: '@types/d3-transition': 3.0.9 '@types/d3-zoom': 3.0.8 + '@types/dagre@0.7.53': {} + '@types/debug@4.1.12': dependencies: '@types/ms': 2.1.0 @@ -9378,6 +9425,8 @@ snapshots: dependencies: '@types/node': 20.11.25 + '@types/lodash@4.17.20': {} + '@types/mdast@4.0.4': dependencies: '@types/unist': 3.0.3 @@ -10173,8 +10222,8 @@ snapshots: autoprefixer@10.4.21(postcss@8.5.6): dependencies: - browserslist: 4.25.1 - caniuse-lite: 1.0.30001727 + browserslist: 4.26.2 + caniuse-lite: 1.0.30001746 fraction.js: 4.3.7 normalize-range: 0.1.2 picocolors: 1.1.1 @@ -10208,6 +10257,8 @@ snapshots: base64-js@1.5.1: optional: true + baseline-browser-mapping@2.8.9: {} + better-opn@3.0.2: dependencies: open: 8.4.2 @@ -10244,19 +10295,13 @@ snapshots: browser-stdout@1.3.1: {} - browserslist@4.25.1: - dependencies: - caniuse-lite: 1.0.30001727 - electron-to-chromium: 1.5.190 - node-releases: 2.0.19 - update-browserslist-db: 1.1.3(browserslist@4.25.1) - - browserslist@4.25.4: + browserslist@4.26.2: dependencies: - caniuse-lite: 1.0.30001741 - electron-to-chromium: 1.5.215 - node-releases: 2.0.20 - update-browserslist-db: 1.1.3(browserslist@4.25.4) + baseline-browser-mapping: 2.8.9 + caniuse-lite: 1.0.30001746 + electron-to-chromium: 1.5.227 + node-releases: 2.0.21 + update-browserslist-db: 1.1.3(browserslist@4.26.2) buffer-crc32@0.2.13: {} @@ -10315,9 +10360,7 @@ snapshots: camelcase@6.3.0: {} - caniuse-lite@1.0.30001727: {} - - caniuse-lite@1.0.30001741: {} + caniuse-lite@1.0.30001746: {} ccount@2.0.1: {} @@ -10437,13 +10480,13 @@ snapshots: codemirror@6.0.1: dependencies: - '@codemirror/autocomplete': 6.18.7 + '@codemirror/autocomplete': 6.19.0 '@codemirror/commands': 6.8.1 '@codemirror/language': 6.11.3 '@codemirror/lint': 6.8.5 '@codemirror/search': 6.5.10 '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 color-convert@2.0.1: dependencies: @@ -10506,6 +10549,8 @@ snapshots: crelt@1.0.6: {} + cronstrue@3.3.0: {} + cross-spawn@7.0.6: dependencies: path-key: 3.1.1 @@ -10569,6 +10614,11 @@ snapshots: d3-selection: 3.0.0 d3-transition: 3.0.1(d3-selection@3.0.0) + dagre@0.8.5: + dependencies: + graphlib: 2.1.8 + lodash: 4.17.21 + data-urls@5.0.0: dependencies: whatwg-mimetype: 4.0.0 @@ -10620,6 +10670,8 @@ snapshots: deep-is@0.1.4: {} + deepmerge@4.3.1: {} + default-browser-id@5.0.0: {} default-browser@5.2.1: @@ -10722,9 +10774,7 @@ snapshots: '@standard-schema/spec': 1.0.0 fast-check: 3.23.2 - electron-to-chromium@1.5.190: {} - - electron-to-chromium@1.5.215: {} + electron-to-chromium@1.5.227: {} elkjs@0.8.2: {} @@ -11274,6 +11324,10 @@ snapshots: graphemer@1.4.0: {} + graphlib@2.1.8: + dependencies: + lodash: 4.17.21 + has-bigints@1.1.0: {} has-flag@4.0.0: {} @@ -12304,9 +12358,7 @@ snapshots: dependencies: es6-promise: 3.3.1 - node-releases@2.0.19: {} - - node-releases@2.0.20: {} + node-releases@2.0.21: {} node-sarif-builder@3.2.0: dependencies: @@ -13427,7 +13479,7 @@ snapshots: sucrase@3.35.0: dependencies: - '@jridgewell/gen-mapping': 0.3.12 + '@jridgewell/gen-mapping': 0.3.13 commander: 4.1.1 glob: 10.4.5 lines-and-columns: 1.2.4 @@ -13579,7 +13631,7 @@ snapshots: terser-webpack-plugin@5.3.14(esbuild@0.25.8)(webpack@5.99.8(esbuild@0.25.8)): dependencies: - '@jridgewell/trace-mapping': 0.3.30 + '@jridgewell/trace-mapping': 0.3.31 jest-worker: 27.5.1 schema-utils: 4.3.2 serialize-javascript: 6.0.2 @@ -13871,15 +13923,9 @@ snapshots: picomatch: 4.0.3 webpack-virtual-modules: 0.6.2 - update-browserslist-db@1.1.3(browserslist@4.25.1): - dependencies: - browserslist: 4.25.1 - escalade: 3.2.0 - picocolors: 1.1.1 - - update-browserslist-db@1.1.3(browserslist@4.25.4): + update-browserslist-db@1.1.3(browserslist@4.26.2): dependencies: - browserslist: 4.25.4 + browserslist: 4.26.2 escalade: 3.2.0 picocolors: 1.1.1 @@ -14235,7 +14281,7 @@ snapshots: '@webassemblyjs/wasm-edit': 1.14.1 '@webassemblyjs/wasm-parser': 1.14.1 acorn: 8.15.0 - browserslist: 4.25.4 + browserslist: 4.26.2 chrome-trace-event: 1.0.4 enhanced-resolve: 5.18.3 es-module-lexer: 1.7.0 diff --git a/web/common/.storybook/main.ts b/web/common/.storybook/main.ts index 8994b8a737..e916ea6f64 100644 --- a/web/common/.storybook/main.ts +++ b/web/common/.storybook/main.ts @@ -2,7 +2,7 @@ import type { StorybookConfig } from '@storybook/react-vite' const config: StorybookConfig = { stories: ['../src/**/*.mdx', '../src/**/*.stories.@(js|jsx|mjs|ts|tsx)'], - addons: ['@storybook/addon-docs', '@storybook/addon-onboarding'], + addons: ['@storybook/addon-docs'], framework: { name: '@storybook/react-vite', options: {}, diff --git a/web/common/.syncpackrc b/web/common/.syncpackrc index 52d97009ce..edc87cc315 100644 --- a/web/common/.syncpackrc +++ b/web/common/.syncpackrc @@ -14,7 +14,7 @@ ], "semverGroups": [ { - "label": "Use caret ranges for all dependencies", + "label": "Use exact versions for all dependencies", "dependencies": [ "**" ], @@ -23,7 +23,7 @@ "peer", "prod" ], - "range": "^" + "range": "" } ] } \ No newline at end of file diff --git a/web/common/package-lock.json b/web/common/package-lock.json deleted file mode 100644 index eaaaee941b..0000000000 --- a/web/common/package-lock.json +++ /dev/null @@ -1,7183 +0,0 @@ -{ - "name": "@tobikodata/sqlmesh-common", - "version": "0.0.1", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "@tobikodata/sqlmesh-common", - "version": "0.0.1", - "license": "Apache-2.0", - "devDependencies": { - "@eslint/js": "^9.31.0", - "@radix-ui/react-dialog": "^1.1.15", - "@radix-ui/react-dropdown-menu": "^2.1.16", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@storybook/addon-docs": "^9.1.5", - "@storybook/addon-onboarding": "^9.1.5", - "@storybook/react-vite": "^9.1.5", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@testing-library/dom": "^10.4.1", - "@testing-library/jest-dom": "^6.6.3", - "@testing-library/react": "^16.3.0", - "@types/node": "^20.11.25", - "@types/react": "^18.3.23", - "@types/react-dom": "^18.3.7", - "@vitejs/plugin-react": "^4.7.0", - "@vitest/browser": "^3.2.4", - "@xyflow/react": "^12.8.4", - "autoprefixer": "^10.4.21", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "cmdk": "^1.1.1", - "eslint": "^9.31.0", - "eslint-plugin-react-hooks": "^5.2.0", - "eslint-plugin-storybook": "^9.1.5", - "fuse.js": "^7.1.0", - "globals": "^16.3.0", - "lucide-react": "^0.542.0", - "playwright": "^1.54.1", - "postcss": "^8.5.6", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "storybook": "^9.1.5", - "syncpack": "^13.0.4", - "tailwind-merge": "^3.3.1", - "tailwind-scrollbar": "^4.0.2", - "tailwindcss": "^3.4.17", - "typescript": "^5.8.3", - "typescript-eslint": "^8.38.0", - "vite": "^6.3.5", - "vite-plugin-dts": "^4.5.4", - "vite-plugin-static-copy": "^3.1.1", - "vitest": "^3.2.4" - }, - "peerDependencies": { - "@radix-ui/react-dialog": "^1.1.15", - "@radix-ui/react-dropdown-menu": "^2.1.16", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@xyflow/react": "^12.8.4", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "cmdk": "^1.1.1", - "fuse.js": "^7.1.0", - "lucide-react": "^0.542.0", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "tailwind-merge": "^3.3.1", - "tailwindcss": "^3.4.17" - } - }, - "../../node_modules/.pnpm/@eslint+js@9.31.0/node_modules/@eslint/js": { - "version": "9.31.0", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://eslint.org/donate" - } - }, - "../../node_modules/.pnpm/@vitejs+plugin-react@4.7.0_vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terse_p5zuafkpgv2vlm3nhxz3zj4hsu/node_modules/@vitejs/plugin-react": { - "version": "4.7.0", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.28.0", - "@babel/plugin-transform-react-jsx-self": "^7.27.1", - "@babel/plugin-transform-react-jsx-source": "^7.27.1", - "@rolldown/pluginutils": "1.0.0-beta.27", - "@types/babel__core": "^7.20.5", - "react-refresh": "^0.17.0" - }, - "devDependencies": { - "@vitejs/react-common": "workspace:*", - "babel-plugin-react-compiler": "19.1.0-rc.2", - "react": "^19.1.0", - "react-dom": "^19.1.0", - "rolldown": "1.0.0-beta.27", - "tsdown": "^0.12.9", - "vitest": "^3.2.4" - }, - "engines": { - "node": "^14.18.0 || >=16.0.0" - }, - "peerDependencies": { - "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "../../node_modules/.pnpm/eslint@9.31.0_jiti@2.4.2/node_modules/eslint": { - "version": "9.31.0", - "dev": true, - "license": "MIT", - "dependencies": { - "@eslint-community/eslint-utils": "^4.2.0", - "@eslint-community/regexpp": "^4.12.1", - "@eslint/config-array": "^0.21.0", - "@eslint/config-helpers": "^0.3.0", - "@eslint/core": "^0.15.0", - "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.31.0", - "@eslint/plugin-kit": "^0.3.1", - "@humanfs/node": "^0.16.6", - "@humanwhocodes/module-importer": "^1.0.1", - "@humanwhocodes/retry": "^0.4.2", - "@types/estree": "^1.0.6", - "@types/json-schema": "^7.0.15", - "ajv": "^6.12.4", - "chalk": "^4.0.0", - "cross-spawn": "^7.0.6", - "debug": "^4.3.2", - "escape-string-regexp": "^4.0.0", - "eslint-scope": "^8.4.0", - "eslint-visitor-keys": "^4.2.1", - "espree": "^10.4.0", - "esquery": "^1.5.0", - "esutils": "^2.0.2", - "fast-deep-equal": "^3.1.3", - "file-entry-cache": "^8.0.0", - "find-up": "^5.0.0", - "glob-parent": "^6.0.2", - "ignore": "^5.2.0", - "imurmurhash": "^0.1.4", - "is-glob": "^4.0.0", - "json-stable-stringify-without-jsonify": "^1.0.1", - "lodash.merge": "^4.6.2", - "minimatch": "^3.1.2", - "natural-compare": "^1.4.0", - "optionator": "^0.9.3" - }, - "bin": { - "eslint": "bin/eslint.js" - }, - "devDependencies": { - "@arethetypeswrong/cli": "^0.18.0", - "@babel/core": "^7.4.3", - "@babel/preset-env": "^7.4.3", - "@cypress/webpack-preprocessor": "^6.0.2", - "@eslint/json": "^0.13.0", - "@trunkio/launcher": "^1.3.4", - "@types/esquery": "^1.5.4", - "@types/node": "^22.13.14", - "@typescript-eslint/parser": "^8.4.0", - "babel-loader": "^8.0.5", - "c8": "^7.12.0", - "chai": "^4.0.1", - "cheerio": "^0.22.0", - "common-tags": "^1.8.0", - "core-js": "^3.1.3", - "cypress": "^14.1.0", - "ejs": "^3.0.2", - "eslint": "file:.", - "eslint-config-eslint": "file:packages/eslint-config-eslint", - "eslint-plugin-eslint-plugin": "^6.0.0", - "eslint-plugin-expect-type": "^0.6.0", - "eslint-plugin-yml": "^1.14.0", - "eslint-release": "^3.3.0", - "eslint-rule-composer": "^0.3.0", - "eslump": "^3.0.0", - "esprima": "^4.0.1", - "fast-glob": "^3.2.11", - "fs-teardown": "^0.1.3", - "glob": "^10.0.0", - "globals": "^16.2.0", - "got": "^11.8.3", - "gray-matter": "^4.0.3", - "jiti": "^2.2.0", - "jiti-v2.0": "npm:jiti@2.0.x", - "jiti-v2.1": "npm:jiti@2.1.x", - "knip": "^5.60.2", - "lint-staged": "^11.0.0", - "load-perf": "^0.2.0", - "markdown-it": "^12.2.0", - "markdown-it-container": "^3.0.0", - "marked": "^4.0.8", - "metascraper": "^5.25.7", - "metascraper-description": "^5.25.7", - "metascraper-image": "^5.29.3", - "metascraper-logo": "^5.25.7", - "metascraper-logo-favicon": "^5.25.7", - "metascraper-title": "^5.25.7", - "mocha": "^11.7.1", - "node-polyfill-webpack-plugin": "^1.0.3", - "npm-license": "^0.3.3", - "pirates": "^4.0.5", - "progress": "^2.0.3", - "proxyquire": "^2.0.1", - "recast": "^0.23.0", - "regenerator-runtime": "^0.14.0", - "semver": "^7.5.3", - "shelljs": "^0.10.0", - "sinon": "^11.0.0", - "typescript": "^5.3.3", - "webpack": "^5.23.0", - "webpack-cli": "^4.5.0", - "yorkie": "^2.0.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://eslint.org/donate" - }, - "peerDependencies": { - "jiti": "*" - }, - "peerDependenciesMeta": { - "jiti": { - "optional": true - } - } - }, - "../../node_modules/.pnpm/typescript-eslint@8.38.0_eslint@9.31.0_jiti@2.4.2__typescript@5.8.3/node_modules/typescript-eslint": { - "version": "8.38.0", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/eslint-plugin": "8.38.0", - "@typescript-eslint/parser": "8.38.0", - "@typescript-eslint/typescript-estree": "8.38.0", - "@typescript-eslint/utils": "8.38.0" - }, - "devDependencies": { - "@vitest/coverage-v8": "^3.1.3", - "eslint": "*", - "rimraf": "*", - "typescript": "*", - "vitest": "^3.1.3" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" - } - }, - "../../node_modules/.pnpm/typescript@5.8.3/node_modules/typescript": { - "version": "5.8.3", - "dev": true, - "license": "Apache-2.0", - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "devDependencies": { - "@dprint/formatter": "^0.4.1", - "@dprint/typescript": "0.93.3", - "@esfx/canceltoken": "^1.0.0", - "@eslint/js": "^9.17.0", - "@octokit/rest": "^21.0.2", - "@types/chai": "^4.3.20", - "@types/diff": "^5.2.3", - "@types/minimist": "^1.2.5", - "@types/mocha": "^10.0.10", - "@types/ms": "^0.7.34", - "@types/node": "latest", - "@types/source-map-support": "^0.5.10", - "@types/which": "^3.0.4", - "@typescript-eslint/rule-tester": "^8.18.1", - "@typescript-eslint/type-utils": "^8.18.1", - "@typescript-eslint/utils": "^8.18.1", - "azure-devops-node-api": "^14.1.0", - "c8": "^10.1.3", - "chai": "^4.5.0", - "chalk": "^4.1.2", - "chokidar": "^3.6.0", - "diff": "^5.2.0", - "dprint": "^0.47.6", - "esbuild": "^0.24.0", - "eslint": "^9.17.0", - "eslint-formatter-autolinkable-stylish": "^1.4.0", - "eslint-plugin-regexp": "^2.7.0", - "fast-xml-parser": "^4.5.1", - "glob": "^10.4.5", - "globals": "^15.13.0", - "hereby": "^1.10.0", - "jsonc-parser": "^3.3.1", - "knip": "^5.41.0", - "minimist": "^1.2.8", - "mocha": "^10.8.2", - "mocha-fivemat-progress-reporter": "^0.1.0", - "monocart-coverage-reports": "^2.11.4", - "ms": "^2.1.3", - "playwright": "^1.49.1", - "source-map-support": "^0.5.21", - "tslib": "^2.8.1", - "typescript": "^5.7.2", - "typescript-eslint": "^8.18.1", - "which": "^3.0.1" - }, - "engines": { - "node": ">=14.17" - } - }, - "../../node_modules/.pnpm/vite-plugin-dts@4.5.4_@types+node@24.1.0_rollup@4.45.1_typescript@5.8.3_vite@6.3.5_@types+nod_ddgp24sr5pf6ze3b5hs7mrzr5e/node_modules/vite-plugin-dts": { - "version": "4.5.4", - "dev": true, - "license": "MIT", - "dependencies": { - "@microsoft/api-extractor": "^7.50.1", - "@rollup/pluginutils": "^5.1.4", - "@volar/typescript": "^2.4.11", - "@vue/language-core": "2.2.0", - "compare-versions": "^6.1.1", - "debug": "^4.4.0", - "kolorist": "^1.8.0", - "local-pkg": "^1.0.0", - "magic-string": "^0.30.17" - }, - "devDependencies": { - "@commitlint/cli": "^19.7.1", - "@types/debug": "^4.1.12", - "@types/minimist": "^1.2.5", - "@types/node": "^22.13.5", - "@types/prompts": "^2.4.9", - "@types/semver": "^7.5.8", - "@vexip-ui/commitlint-config": "^0.5.0", - "@vexip-ui/eslint-config": "^0.12.1", - "@vexip-ui/prettier-config": "^1.0.0", - "@vexip-ui/scripts": "^1.2.0", - "@vue/eslint-config-standard": "^8.0.1", - "@vue/eslint-config-typescript": "^13.0.0", - "conventional-changelog-cli": "^5.0.0", - "eslint": "^8.57.0", - "execa": "^9.5.2", - "husky": "^9.1.7", - "is-ci": "^4.1.0", - "lint-staged": "^15.4.3", - "minimist": "^1.2.8", - "pinst": "^3.0.0", - "prettier": "^3.5.2", - "pretty-quick": "^4.0.0", - "prompts": "^2.4.2", - "rimraf": "^6.0.1", - "semver": "^7.7.1", - "tsx": "^4.19.3", - "typescript": "5.7.3", - "unbuild": "^3.3.1", - "vite": "^6.2.0", - "vitest": "^3.0.7" - }, - "peerDependencies": { - "typescript": "*", - "vite": "*" - }, - "peerDependenciesMeta": { - "vite": { - "optional": true - } - } - }, - "../../node_modules/.pnpm/vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terser@5.43.1_tsx@4.20.3_yaml@2.8.0/node_modules/vite": { - "version": "6.3.5", - "dev": true, - "license": "MIT", - "dependencies": { - "esbuild": "^0.25.0", - "fdir": "^6.4.4", - "picomatch": "^4.0.2", - "postcss": "^8.5.3", - "rollup": "^4.34.9", - "tinyglobby": "^0.2.13" - }, - "bin": { - "vite": "bin/vite.js" - }, - "devDependencies": { - "@ampproject/remapping": "^2.3.0", - "@babel/parser": "^7.27.0", - "@jridgewell/trace-mapping": "^0.3.25", - "@polka/compression": "^1.0.0-next.25", - "@rollup/plugin-alias": "^5.1.1", - "@rollup/plugin-commonjs": "^28.0.3", - "@rollup/plugin-dynamic-import-vars": "2.1.4", - "@rollup/plugin-json": "^6.1.0", - "@rollup/plugin-node-resolve": "16.0.1", - "@rollup/pluginutils": "^5.1.4", - "@types/escape-html": "^1.0.4", - "@types/pnpapi": "^0.0.5", - "artichokie": "^0.3.1", - "cac": "^6.7.14", - "chokidar": "^3.6.0", - "connect": "^3.7.0", - "convert-source-map": "^2.0.0", - "cors": "^2.8.5", - "cross-spawn": "^7.0.6", - "debug": "^4.4.0", - "dep-types": "link:./src/types", - "dotenv": "^16.5.0", - "dotenv-expand": "^12.0.2", - "es-module-lexer": "^1.6.0", - "escape-html": "^1.0.3", - "estree-walker": "^3.0.3", - "etag": "^1.8.1", - "http-proxy": "^1.18.1", - "launch-editor-middleware": "^2.10.0", - "lightningcss": "^1.29.3", - "magic-string": "^0.30.17", - "mlly": "^1.7.4", - "mrmime": "^2.0.1", - "nanoid": "^5.1.5", - "open": "^10.1.1", - "parse5": "^7.2.1", - "pathe": "^2.0.3", - "periscopic": "^4.0.2", - "picocolors": "^1.1.1", - "postcss-import": "^16.1.0", - "postcss-load-config": "^6.0.1", - "postcss-modules": "^6.0.1", - "resolve.exports": "^2.0.3", - "rollup-plugin-dts": "^6.2.1", - "rollup-plugin-esbuild": "^6.2.1", - "rollup-plugin-license": "^3.6.0", - "sass": "^1.86.3", - "sass-embedded": "^1.86.3", - "sirv": "^3.0.1", - "source-map-support": "^0.5.21", - "strip-literal": "^3.0.0", - "terser": "^5.39.0", - "tsconfck": "^3.1.5", - "tslib": "^2.8.1", - "types": "link:./types", - "ufo": "^1.6.1", - "ws": "^8.18.1" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://github.com/vitejs/vite?sponsor=1" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - }, - "peerDependencies": { - "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", - "jiti": ">=1.21.0", - "less": "*", - "lightningcss": "^1.21.0", - "sass": "*", - "sass-embedded": "*", - "stylus": "*", - "sugarss": "*", - "terser": "^5.16.0", - "tsx": "^4.8.1", - "yaml": "^2.4.2" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "jiti": { - "optional": true - }, - "less": { - "optional": true - }, - "lightningcss": { - "optional": true - }, - "sass": { - "optional": true - }, - "sass-embedded": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "terser": { - "optional": true - }, - "tsx": { - "optional": true - }, - "yaml": { - "optional": true - } - } - }, - "node_modules/@adobe/css-tools": { - "version": "4.4.4", - "resolved": "https://registry.npmjs.org/@adobe/css-tools/-/css-tools-4.4.4.tgz", - "integrity": "sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg==", - "dev": true, - "license": "MIT" - }, - "node_modules/@alloc/quick-lru": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", - "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@ampproject/remapping": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", - "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.24" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/code-frame": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz", - "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-validator-identifier": "^7.27.1", - "js-tokens": "^4.0.0", - "picocolors": "^1.1.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/compat-data": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.28.0.tgz", - "integrity": "sha512-60X7qkglvrap8mn1lh2ebxXdZYtUcpd7gsmy9kLaBJ4i/WdY8PqTSdxyA8qraikqKQK5C1KRBKXqznrVapyNaw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/core": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.3.tgz", - "integrity": "sha512-yDBHV9kQNcr2/sUr9jghVyz9C3Y5G2zUM2H2lo+9mKv4sFgbA8s8Z9t8D1jiTkGoO/NoIfKMyKWr4s6CN23ZwQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@ampproject/remapping": "^2.2.0", - "@babel/code-frame": "^7.27.1", - "@babel/generator": "^7.28.3", - "@babel/helper-compilation-targets": "^7.27.2", - "@babel/helper-module-transforms": "^7.28.3", - "@babel/helpers": "^7.28.3", - "@babel/parser": "^7.28.3", - "@babel/template": "^7.27.2", - "@babel/traverse": "^7.28.3", - "@babel/types": "^7.28.2", - "convert-source-map": "^2.0.0", - "debug": "^4.1.0", - "gensync": "^1.0.0-beta.2", - "json5": "^2.2.3", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/babel" - } - }, - "node_modules/@babel/core/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/generator": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.28.3.tgz", - "integrity": "sha512-3lSpxGgvnmZznmBkCRnVREPUFJv2wrv9iAoFDvADJc0ypmdOxdUtcLeBgBJ6zE0PMeTKnxeQzyk0xTBq4Ep7zw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.28.3", - "@babel/types": "^7.28.2", - "@jridgewell/gen-mapping": "^0.3.12", - "@jridgewell/trace-mapping": "^0.3.28", - "jsesc": "^3.0.2" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets": { - "version": "7.27.2", - "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.27.2.tgz", - "integrity": "sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/compat-data": "^7.27.2", - "@babel/helper-validator-option": "^7.27.1", - "browserslist": "^4.24.0", - "lru-cache": "^5.1.1", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets/node_modules/lru-cache": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", - "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", - "dev": true, - "license": "ISC", - "dependencies": { - "yallist": "^3.0.2" - } - }, - "node_modules/@babel/helper-compilation-targets/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/helper-globals": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz", - "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-imports": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.27.1.tgz", - "integrity": "sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/traverse": "^7.27.1", - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-transforms": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.3.tgz", - "integrity": "sha512-gytXUbs8k2sXS9PnQptz5o0QnpLL51SwASIORY6XaBKF88nsOT0Zw9szLqlSGQDP/4TljBAD5y98p2U1fqkdsw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-module-imports": "^7.27.1", - "@babel/helper-validator-identifier": "^7.27.1", - "@babel/traverse": "^7.28.3" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-string-parser": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", - "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz", - "integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-option": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", - "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helpers": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.3.tgz", - "integrity": "sha512-PTNtvUQihsAsDHMOP5pfobP8C6CM4JWXmP8DrEIt46c3r2bf87Ua1zoqevsMo9g+tWDwgWrFP5EIxuBx5RudAw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/template": "^7.27.2", - "@babel/types": "^7.28.2" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/parser": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.28.3.tgz", - "integrity": "sha512-7+Ey1mAgYqFAx2h0RuoxcQT5+MlG3GTV0TQrgr7/ZliKsm/MNDxVVutlWaziMq7wJNAz8MTqz55XLpWvva6StA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.28.2" - }, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/runtime": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.3.tgz", - "integrity": "sha512-9uIQ10o0WGdpP6GDhXcdOJPJuDgFtIDtN/9+ArJQ2NAfAmiuhTQdzkaTGR33v43GYS2UrSA0eX2pPPHoFVvpxA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/template": { - "version": "7.27.2", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.2.tgz", - "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/parser": "^7.27.2", - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/traverse": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.28.3.tgz", - "integrity": "sha512-7w4kZYHneL3A6NP2nxzHvT3HCZ7puDZZjFMqDpBPECub79sTtSO5CGXDkKrTQq8ksAwfD/XI2MRFX23njdDaIQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/generator": "^7.28.3", - "@babel/helper-globals": "^7.28.0", - "@babel/parser": "^7.28.3", - "@babel/template": "^7.27.2", - "@babel/types": "^7.28.2", - "debug": "^4.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/types": { - "version": "7.28.2", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.28.2.tgz", - "integrity": "sha512-ruv7Ae4J5dUYULmeXw1gmb7rYRz57OWCPM57pHojnLq/3Z1CK2lNSLTCVjxVk1F/TZHwOZZrOWi0ur95BbLxNQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^7.27.1", - "@babel/helper-validator-identifier": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@esbuild/aix-ppc64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.9.tgz", - "integrity": "sha512-OaGtL73Jck6pBKjNIe24BnFE6agGl+6KxDtTfHhy1HmhthfKouEcOhqpSL64K4/0WCtbKFLOdzD/44cJ4k9opA==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "aix" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.9.tgz", - "integrity": "sha512-5WNI1DaMtxQ7t7B6xa572XMXpHAaI/9Hnhk8lcxF4zVN4xstUgTlvuGDorBguKEnZO70qwEcLpfifMLoxiPqHQ==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.9.tgz", - "integrity": "sha512-IDrddSmpSv51ftWslJMvl3Q2ZT98fUSL2/rlUXuVqRXHCs5EUF1/f+jbjF5+NG9UffUDMCiTyh8iec7u8RlTLg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.9.tgz", - "integrity": "sha512-I853iMZ1hWZdNllhVZKm34f4wErd4lMyeV7BLzEExGEIZYsOzqDWDf+y082izYUE8gtJnYHdeDpN/6tUdwvfiw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.9.tgz", - "integrity": "sha512-XIpIDMAjOELi/9PB30vEbVMs3GV1v2zkkPnuyRRURbhqjyzIINwj+nbQATh4H9GxUgH1kFsEyQMxwiLFKUS6Rg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.9.tgz", - "integrity": "sha512-jhHfBzjYTA1IQu8VyrjCX4ApJDnH+ez+IYVEoJHeqJm9VhG9Dh2BYaJritkYK3vMaXrf7Ogr/0MQ8/MeIefsPQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.9.tgz", - "integrity": "sha512-z93DmbnY6fX9+KdD4Ue/H6sYs+bhFQJNCPZsi4XWJoYblUqT06MQUdBCpcSfuiN72AbqeBFu5LVQTjfXDE2A6Q==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.9.tgz", - "integrity": "sha512-mrKX6H/vOyo5v71YfXWJxLVxgy1kyt1MQaD8wZJgJfG4gq4DpQGpgTB74e5yBeQdyMTbgxp0YtNj7NuHN0PoZg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.9.tgz", - "integrity": "sha512-HBU2Xv78SMgaydBmdor38lg8YDnFKSARg1Q6AT0/y2ezUAKiZvc211RDFHlEZRFNRVhcMamiToo7bDx3VEOYQw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.9.tgz", - "integrity": "sha512-BlB7bIcLT3G26urh5Dmse7fiLmLXnRlopw4s8DalgZ8ef79Jj4aUcYbk90g8iCa2467HX8SAIidbL7gsqXHdRw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ia32": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.9.tgz", - "integrity": "sha512-e7S3MOJPZGp2QW6AK6+Ly81rC7oOSerQ+P8L0ta4FhVi+/j/v2yZzx5CqqDaWjtPFfYz21Vi1S0auHrap3Ma3A==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-loong64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.9.tgz", - "integrity": "sha512-Sbe10Bnn0oUAB2AalYztvGcK+o6YFFA/9829PhOCUS9vkJElXGdphz0A3DbMdP8gmKkqPmPcMJmJOrI3VYB1JQ==", - "cpu": [ - "loong64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-mips64el": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.9.tgz", - "integrity": "sha512-YcM5br0mVyZw2jcQeLIkhWtKPeVfAerES5PvOzaDxVtIyZ2NUBZKNLjC5z3/fUlDgT6w89VsxP2qzNipOaaDyA==", - "cpu": [ - "mips64el" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ppc64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.9.tgz", - "integrity": "sha512-++0HQvasdo20JytyDpFvQtNrEsAgNG2CY1CLMwGXfFTKGBGQT3bOeLSYE2l1fYdvML5KUuwn9Z8L1EWe2tzs1w==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-riscv64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.9.tgz", - "integrity": "sha512-uNIBa279Y3fkjV+2cUjx36xkx7eSjb8IvnL01eXUKXez/CBHNRw5ekCGMPM0BcmqBxBcdgUWuUXmVWwm4CH9kg==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-s390x": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.9.tgz", - "integrity": "sha512-Mfiphvp3MjC/lctb+7D287Xw1DGzqJPb/J2aHHcHxflUo+8tmN/6d4k6I2yFR7BVo5/g7x2Monq4+Yew0EHRIA==", - "cpu": [ - "s390x" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.9.tgz", - "integrity": "sha512-iSwByxzRe48YVkmpbgoxVzn76BXjlYFXC7NvLYq+b+kDjyyk30J0JY47DIn8z1MO3K0oSl9fZoRmZPQI4Hklzg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.9.tgz", - "integrity": "sha512-9jNJl6FqaUG+COdQMjSCGW4QiMHH88xWbvZ+kRVblZsWrkXlABuGdFJ1E9L7HK+T0Yqd4akKNa/lO0+jDxQD4Q==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.9.tgz", - "integrity": "sha512-RLLdkflmqRG8KanPGOU7Rpg829ZHu8nFy5Pqdi9U01VYtG9Y0zOG6Vr2z4/S+/3zIyOxiK6cCeYNWOFR9QP87g==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.9.tgz", - "integrity": "sha512-YaFBlPGeDasft5IIM+CQAhJAqS3St3nJzDEgsgFixcfZeyGPCd6eJBWzke5piZuZ7CtL656eOSYKk4Ls2C0FRQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.9.tgz", - "integrity": "sha512-1MkgTCuvMGWuqVtAvkpkXFmtL8XhWy+j4jaSO2wxfJtilVCi0ZE37b8uOdMItIHz4I6z1bWWtEX4CJwcKYLcuA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openharmony-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.9.tgz", - "integrity": "sha512-4Xd0xNiMVXKh6Fa7HEJQbrpP3m3DDn43jKxMjxLLRjWnRsfxjORYJlXPO4JNcXtOyfajXorRKY9NkOpTHptErg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openharmony" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.9.tgz", - "integrity": "sha512-WjH4s6hzo00nNezhp3wFIAfmGZ8U7KtrJNlFMRKxiI9mxEK1scOMAaa9i4crUtu+tBr+0IN6JCuAcSBJZfnphw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "sunos" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.9.tgz", - "integrity": "sha512-mGFrVJHmZiRqmP8xFOc6b84/7xa5y5YvR1x8djzXpJBSv/UsNK6aqec+6JDjConTgvvQefdGhFDAs2DLAds6gQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.9.tgz", - "integrity": "sha512-b33gLVU2k11nVx1OhX3C8QQP6UHQK4ZtN56oFWvVXvz2VkDoe6fbG8TOgHFxEvqeqohmRnIHe5A1+HADk4OQww==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.9.tgz", - "integrity": "sha512-PPOl1mi6lpLNQxnGoyAfschAodRFYXJ+9fs6WHXz7CSWKbOqiMZsubC+BQsVKuul+3vKLuwTHsS2c2y9EoKwxQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@eslint-community/eslint-utils": { - "version": "4.7.0", - "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz", - "integrity": "sha512-dyybb3AcajC7uha6CvhdVRJqaKyn7w2YKqKyAN37NKYgZT36w+iRb0Dymmc5qEJ549c/S31cMMSFd75bteCpCw==", - "dev": true, - "license": "MIT", - "dependencies": { - "eslint-visitor-keys": "^3.4.3" - }, - "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - }, - "peerDependencies": { - "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" - } - }, - "node_modules/@eslint/js": { - "resolved": "../../node_modules/.pnpm/@eslint+js@9.31.0/node_modules/@eslint/js", - "link": true - }, - "node_modules/@floating-ui/core": { - "version": "1.7.3", - "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz", - "integrity": "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/utils": "^0.2.10" - } - }, - "node_modules/@floating-ui/dom": { - "version": "1.7.4", - "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.4.tgz", - "integrity": "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/core": "^1.7.3", - "@floating-ui/utils": "^0.2.10" - } - }, - "node_modules/@floating-ui/react-dom": { - "version": "2.1.6", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.6.tgz", - "integrity": "sha512-4JX6rEatQEvlmgU80wZyq9RT96HZJa88q8hp0pBd+LrczeDI4o6uA2M+uvxngVHo4Ihr8uibXxH6+70zhAFrVw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/dom": "^1.7.4" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" - } - }, - "node_modules/@floating-ui/utils": { - "version": "0.2.10", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz", - "integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", - "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "dev": true, - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@joshwooding/vite-plugin-react-docgen-typescript": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/@joshwooding/vite-plugin-react-docgen-typescript/-/vite-plugin-react-docgen-typescript-0.6.1.tgz", - "integrity": "sha512-J4BaTocTOYFkMHIra1JDWrMWpNmBl4EkplIwHEsV8aeUOtdWjwSnln9U7twjMFTAEB7mptNtSKyVi1Y2W9sDJw==", - "dev": true, - "license": "MIT", - "dependencies": { - "glob": "^10.0.0", - "magic-string": "^0.30.0", - "react-docgen-typescript": "^2.2.2" - }, - "peerDependencies": { - "typescript": ">= 4.3.x", - "vite": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.13", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", - "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", - "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.5", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", - "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", - "dev": true, - "license": "MIT" - }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.30", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.30.tgz", - "integrity": "sha512-GQ7Nw5G2lTu/BtHTKfXhKHok2WGetd4XYcVKGx00SjAk8GMwgJM3zr6zORiPGuOE+/vkc90KtTosSSvaCjKb2Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/@mdx-js/react": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.0.tgz", - "integrity": "sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/mdx": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - }, - "peerDependencies": { - "@types/react": ">=16", - "react": ">=16" - } - }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", - "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, - "license": "MIT", - "optional": true, - "engines": { - "node": ">=14" - } - }, - "node_modules/@polka/url": { - "version": "1.0.0-next.29", - "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.29.tgz", - "integrity": "sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==", - "dev": true, - "license": "MIT" - }, - "node_modules/@radix-ui/primitive": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", - "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", - "dev": true, - "license": "MIT" - }, - "node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", - "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-collection": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz", - "integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-compose-refs": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz", - "integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-context": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", - "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dialog": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.15.tgz", - "integrity": "sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-direction": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz", - "integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", - "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dropdown-menu": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz", - "integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-menu": "2.1.16", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-guards": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", - "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", - "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-id": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", - "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-menu": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz", - "integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popper": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", - "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-portal": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", - "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-presence": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", - "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", - "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-roving-focus": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz", - "integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-tooltip": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz", - "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-visually-hidden": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-callback-ref": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", - "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-controllable-state": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz", - "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-effect-event": "0.0.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-effect-event": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz", - "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-escape-keydown": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz", - "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-layout-effect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", - "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz", - "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-size": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz", - "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-visually-hidden": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz", - "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz", - "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@rollup/pluginutils": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-5.2.0.tgz", - "integrity": "sha512-qWJ2ZTbmumwiLFomfzTyt5Kng4hwPi9rwCYN4SHb6eaRU1KNO4ccxINHr/VhH4GgPlt1XfSTLX2LBTme8ne4Zw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "estree-walker": "^2.0.2", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/pluginutils/node_modules/estree-walker": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", - "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@rollup/pluginutils/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/@sindresorhus/merge-streams": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/@sindresorhus/merge-streams/-/merge-streams-2.3.0.tgz", - "integrity": "sha512-LtoMMhxAlorcGhmFYI+LhPgbPZCkgP6ra1YL604EeF6U98pLlQ3iWIGMdWSC+vWmPBWBNgmDBAhnAobLROJmwg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@standard-schema/spec": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.0.0.tgz", - "integrity": "sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@storybook/addon-docs": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/addon-docs/-/addon-docs-9.1.5.tgz", - "integrity": "sha512-q1j5RRElxFSnHOh60eS3dS2TAyAHzcQeH/2B9UXo6MUHu7HmhNpw3qt2YibIw0zEogHCvZhLNx6TNzSy+7wRUw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@mdx-js/react": "^3.0.0", - "@storybook/csf-plugin": "9.1.5", - "@storybook/icons": "^1.4.0", - "@storybook/react-dom-shim": "9.1.5", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", - "ts-dedent": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/addon-onboarding": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/addon-onboarding/-/addon-onboarding-9.1.5.tgz", - "integrity": "sha512-UJpkWLbugcSGzSUzivTTNdO0Y8gpAn//qJzn2TobwkPJgSwQEoHcjUfWjgZ3mSpQrSQO2e1O1yC3SJTBQt/fqQ==", - "dev": true, - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/builder-vite": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/builder-vite/-/builder-vite-9.1.5.tgz", - "integrity": "sha512-sgt/9+Yl/5O7Bj5hdbHfadN8e/e4CNiDZKDcbLOMpOjKKoqF8vm19I1QocWIAiKjTOhF+4E9v9LddjtAGnfqHQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@storybook/csf-plugin": "9.1.5", - "ts-dedent": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/@storybook/csf-plugin": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/csf-plugin/-/csf-plugin-9.1.5.tgz", - "integrity": "sha512-PmHuF+j11Z7BxAI2/4wQYn0gH1d67gNvycyR+EWgp4P/AWam9wFbuI/T1R45CRQTV2/VrfGdts/tFrvo5kXWig==", - "dev": true, - "license": "MIT", - "dependencies": { - "unplugin": "^1.3.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/global": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/@storybook/global/-/global-5.0.0.tgz", - "integrity": "sha512-FcOqPAXACP0I3oJ/ws6/rrPT9WGhu915Cg8D02a9YxLo0DE9zI+a9A5gRGvmQ09fiWPukqI8ZAEoQEdWUKMQdQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@storybook/icons": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/@storybook/icons/-/icons-1.4.0.tgz", - "integrity": "sha512-Td73IeJxOyalzvjQL+JXx72jlIYHgs+REaHiREOqfpo3A2AYYG71AUbcv+lg7mEDIweKVCxsMQ0UKo634c8XeA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta" - } - }, - "node_modules/@storybook/react": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/react/-/react-9.1.5.tgz", - "integrity": "sha512-fBVP7Go09gzpImtaMcZ2DipLEWdWeTmz7BrACr3Z8uCyKcoH8/d1Wv0JgIiBo1UKDh5ZgYx5pLafaPNqmVAepg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@storybook/global": "^5.0.0", - "@storybook/react-dom-shim": "9.1.5" - }, - "engines": { - "node": ">=20.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "storybook": "^9.1.5", - "typescript": ">= 4.9.x" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@storybook/react-dom-shim": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/react-dom-shim/-/react-dom-shim-9.1.5.tgz", - "integrity": "sha512-blSq9uzSYnfgEYPHYKgM5O14n8hbXNiXx2GiVJyDSg8QPNicbsBg+lCb1TC7/USfV26pNZr/lGNNKGkcCEN6Gw==", - "dev": true, - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/react-vite": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/react-vite/-/react-vite-9.1.5.tgz", - "integrity": "sha512-OYbkHHNCrn8MNPd+4KxMjcSR4M/YHa84h8sWDUHhKRTRtZFmj8i/QDW3E8tGx2BRLxXw3dTYe9J5UYBhJDDxFA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@joshwooding/vite-plugin-react-docgen-typescript": "0.6.1", - "@rollup/pluginutils": "^5.0.2", - "@storybook/builder-vite": "9.1.5", - "@storybook/react": "9.1.5", - "find-up": "^7.0.0", - "magic-string": "^0.30.0", - "react-docgen": "^8.0.0", - "resolve": "^1.22.8", - "tsconfig-paths": "^4.2.0" - }, - "engines": { - "node": ">=20.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "storybook": "^9.1.5", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/@tailwindcss/typography": { - "version": "0.5.16", - "resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.16.tgz", - "integrity": "sha512-0wDLwCVF5V3x3b1SGXPCDcdsbDHMBe+lkFzBRaHeLvNi+nrrnZ1lA18u+OTWO8iSWU2GxUOCvlXtDuqftc1oiA==", - "dev": true, - "license": "MIT", - "dependencies": { - "lodash.castarray": "^4.4.0", - "lodash.isplainobject": "^4.0.6", - "lodash.merge": "^4.6.2", - "postcss-selector-parser": "6.0.10" - }, - "peerDependencies": { - "tailwindcss": ">=3.0.0 || insiders || >=4.0.0-alpha.20 || >=4.0.0-beta.1" - } - }, - "node_modules/@tanstack/react-virtual": { - "version": "3.13.12", - "resolved": "https://registry.npmjs.org/@tanstack/react-virtual/-/react-virtual-3.13.12.tgz", - "integrity": "sha512-Gd13QdxPSukP8ZrkbgS2RwoZseTTbQPLnQEn7HY/rqtM+8Zt95f7xKC7N0EsKs7aoz0WzZ+fditZux+F8EzYxA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@tanstack/virtual-core": "3.13.12" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/@tanstack/virtual-core": { - "version": "3.13.12", - "resolved": "https://registry.npmjs.org/@tanstack/virtual-core/-/virtual-core-3.13.12.tgz", - "integrity": "sha512-1YBOJfRHV4sXUmWsFSf5rQor4Ss82G8dQWLRbnk3GA4jeP8hQt1hxXh0tmflpC0dz3VgEv/1+qwPyLeWkQuPFA==", - "dev": true, - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - } - }, - "node_modules/@testing-library/dom": { - "version": "10.4.1", - "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz", - "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.10.4", - "@babel/runtime": "^7.12.5", - "@types/aria-query": "^5.0.1", - "aria-query": "5.3.0", - "dom-accessibility-api": "^0.5.9", - "lz-string": "^1.5.0", - "picocolors": "1.1.1", - "pretty-format": "^27.0.2" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@testing-library/jest-dom": { - "version": "6.7.0", - "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.7.0.tgz", - "integrity": "sha512-RI2e97YZ7MRa+vxP4UUnMuMFL2buSsf0ollxUbTgrbPLKhMn8KVTx7raS6DYjC7v1NDVrioOvaShxsguLNISCA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@adobe/css-tools": "^4.4.0", - "aria-query": "^5.0.0", - "css.escape": "^1.5.1", - "dom-accessibility-api": "^0.6.3", - "picocolors": "^1.1.1", - "redent": "^3.0.0" - }, - "engines": { - "node": ">=14", - "npm": ">=6", - "yarn": ">=1" - } - }, - "node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz", - "integrity": "sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@testing-library/react": { - "version": "16.3.0", - "resolved": "https://registry.npmjs.org/@testing-library/react/-/react-16.3.0.tgz", - "integrity": "sha512-kFSyxiEDwv1WLl2fgsq6pPBbw5aWKrsY2/noi1Id0TK0UParSF62oFQFGHXIyaG4pp2tEub/Zlel+fjjZILDsw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/runtime": "^7.12.5" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@testing-library/dom": "^10.0.0", - "@types/react": "^18.0.0 || ^19.0.0", - "@types/react-dom": "^18.0.0 || ^19.0.0", - "react": "^18.0.0 || ^19.0.0", - "react-dom": "^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@testing-library/user-event": { - "version": "14.6.1", - "resolved": "https://registry.npmjs.org/@testing-library/user-event/-/user-event-14.6.1.tgz", - "integrity": "sha512-vq7fv0rnt+QTXgPxr5Hjc210p6YKq2kmdziLgnsZGgLJ9e6VAShx1pACLuRjd/AS/sr7phAR58OIIpf0LlmQNw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12", - "npm": ">=6" - }, - "peerDependencies": { - "@testing-library/dom": ">=7.21.4" - } - }, - "node_modules/@types/aria-query": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz", - "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/babel__core": { - "version": "7.20.5", - "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", - "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.20.7", - "@babel/types": "^7.20.7", - "@types/babel__generator": "*", - "@types/babel__template": "*", - "@types/babel__traverse": "*" - } - }, - "node_modules/@types/babel__generator": { - "version": "7.27.0", - "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.27.0.tgz", - "integrity": "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__template": { - "version": "7.4.4", - "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", - "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.1.0", - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__traverse": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.28.0.tgz", - "integrity": "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.28.2" - } - }, - "node_modules/@types/chai": { - "version": "5.2.2", - "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.2.tgz", - "integrity": "sha512-8kB30R7Hwqf40JPiKhVzodJs2Qc1ZJ5zuT3uzw5Hq/dhNCl3G3l83jfpdI1e20BP348+fV7VIL/+FxaXkqBmWg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/deep-eql": "*" - } - }, - "node_modules/@types/d3-color": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-3.1.3.tgz", - "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/d3-drag": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/@types/d3-drag/-/d3-drag-3.0.7.tgz", - "integrity": "sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-selection": "*" - } - }, - "node_modules/@types/d3-interpolate": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz", - "integrity": "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-color": "*" - } - }, - "node_modules/@types/d3-selection": { - "version": "3.0.11", - "resolved": "https://registry.npmjs.org/@types/d3-selection/-/d3-selection-3.0.11.tgz", - "integrity": "sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/d3-transition": { - "version": "3.0.9", - "resolved": "https://registry.npmjs.org/@types/d3-transition/-/d3-transition-3.0.9.tgz", - "integrity": "sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-selection": "*" - } - }, - "node_modules/@types/d3-zoom": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/@types/d3-zoom/-/d3-zoom-3.0.8.tgz", - "integrity": "sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-interpolate": "*", - "@types/d3-selection": "*" - } - }, - "node_modules/@types/deep-eql": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz", - "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/doctrine": { - "version": "0.0.9", - "resolved": "https://registry.npmjs.org/@types/doctrine/-/doctrine-0.0.9.tgz", - "integrity": "sha512-eOIHzCUSH7SMfonMG1LsC2f8vxBFtho6NGBznK41R84YzPuvSBzrhEps33IsQiOW9+VL6NQ9DbjQJznk/S4uRA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/estree": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", - "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/mdx": { - "version": "2.0.13", - "resolved": "https://registry.npmjs.org/@types/mdx/-/mdx-2.0.13.tgz", - "integrity": "sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/node": { - "version": "20.19.13", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.13.tgz", - "integrity": "sha512-yCAeZl7a0DxgNVteXFHt9+uyFbqXGy/ShC4BlcHkoE0AfGXYv/BUiplV72DjMYXHDBXFjhvr6DD1NiRVfB4j8g==", - "dev": true, - "license": "MIT", - "dependencies": { - "undici-types": "~6.21.0" - } - }, - "node_modules/@types/prismjs": { - "version": "1.26.5", - "resolved": "https://registry.npmjs.org/@types/prismjs/-/prismjs-1.26.5.tgz", - "integrity": "sha512-AUZTa7hQ2KY5L7AmtSiqxlhWxb4ina0yd8hNbl4TWuqnv/pFP0nDMb3YrfSBf4hJVGLh2YEIBfKaBW/9UEl6IQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/prop-types": { - "version": "15.7.15", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz", - "integrity": "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/react": { - "version": "18.3.23", - "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.23.tgz", - "integrity": "sha512-/LDXMQh55EzZQ0uVAZmKKhfENivEvWz6E+EYzh+/MCjMhNsotd+ZHhBGIjFDTi6+fz0OhQQQLbTgdQIxxCsC0w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/prop-types": "*", - "csstype": "^3.0.2" - } - }, - "node_modules/@types/react-dom": { - "version": "18.3.7", - "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.7.tgz", - "integrity": "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "^18.0.0" - } - }, - "node_modules/@types/resolve": { - "version": "1.20.6", - "resolved": "https://registry.npmjs.org/@types/resolve/-/resolve-1.20.6.tgz", - "integrity": "sha512-A4STmOXPhMUtHH+S6ymgE2GiBSMqf4oTvcQZMcHzokuTLVYzXTB8ttjcgxOVaAp2lGwEdzZ0J+cRbbeevQj1UQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@typescript-eslint/project-service": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.39.1.tgz", - "integrity": "sha512-8fZxek3ONTwBu9ptw5nCKqZOSkXshZB7uAxuFF0J/wTMkKydjXCzqqga7MlFMpHi9DoG4BadhmTkITBcg8Aybw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.39.1", - "@typescript-eslint/types": "^8.39.1", - "debug": "^4.3.4" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/scope-manager": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.39.1.tgz", - "integrity": "sha512-RkBKGBrjgskFGWuyUGz/EtD8AF/GW49S21J8dvMzpJitOF1slLEbbHnNEtAHtnDAnx8qDEdRrULRnWVx27wGBw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/types": "8.39.1", - "@typescript-eslint/visitor-keys": "8.39.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.39.1.tgz", - "integrity": "sha512-ePUPGVtTMR8XMU2Hee8kD0Pu4NDE1CN9Q1sxGSGd/mbOtGZDM7pnhXNJnzW63zk/q+Z54zVzj44HtwXln5CvHA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/types": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.39.1.tgz", - "integrity": "sha512-7sPDKQQp+S11laqTrhHqeAbsCfMkwJMrV7oTDvtDds4mEofJYir414bYKUEb8YPUm9QL3U+8f6L6YExSoAGdQw==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.39.1.tgz", - "integrity": "sha512-EKkpcPuIux48dddVDXyQBlKdeTPMmALqBUbEk38McWv0qVEZwOpVJBi7ugK5qVNgeuYjGNQxrrnoM/5+TI/BPw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/project-service": "8.39.1", - "@typescript-eslint/tsconfig-utils": "8.39.1", - "@typescript-eslint/types": "8.39.1", - "@typescript-eslint/visitor-keys": "8.39.1", - "debug": "^4.3.4", - "fast-glob": "^3.3.2", - "is-glob": "^4.0.3", - "minimatch": "^9.0.4", - "semver": "^7.6.0", - "ts-api-utils": "^2.1.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/utils": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.39.1.tgz", - "integrity": "sha512-VF5tZ2XnUSTuiqZFXCZfZs1cgkdd3O/sSYmdo2EpSyDlC86UM/8YytTmKnehOW3TGAlivqTDT6bS87B/GQ/jyg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.39.1", - "@typescript-eslint/types": "8.39.1", - "@typescript-eslint/typescript-estree": "8.39.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.39.1.tgz", - "integrity": "sha512-W8FQi6kEh2e8zVhQ0eeRnxdvIoOkAp/CPAahcNio6nO9dsIwb9b34z90KOlheoyuVf6LSOEdjlkxSkapNEc+4A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/types": "8.39.1", - "eslint-visitor-keys": "^4.2.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/visitor-keys/node_modules/eslint-visitor-keys": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", - "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/@vitejs/plugin-react": { - "resolved": "../../node_modules/.pnpm/@vitejs+plugin-react@4.7.0_vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terse_p5zuafkpgv2vlm3nhxz3zj4hsu/node_modules/@vitejs/plugin-react", - "link": true - }, - "node_modules/@vitest/browser": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/browser/-/browser-3.2.4.tgz", - "integrity": "sha512-tJxiPrWmzH8a+w9nLKlQMzAKX/7VjFs50MWgcAj7p9XQ7AQ9/35fByFYptgPELyLw+0aixTnC4pUWV+APcZ/kw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@testing-library/dom": "^10.4.0", - "@testing-library/user-event": "^14.6.1", - "@vitest/mocker": "3.2.4", - "@vitest/utils": "3.2.4", - "magic-string": "^0.30.17", - "sirv": "^3.0.1", - "tinyrainbow": "^2.0.0", - "ws": "^8.18.2" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "playwright": "*", - "vitest": "3.2.4", - "webdriverio": "^7.0.0 || ^8.0.0 || ^9.0.0" - }, - "peerDependenciesMeta": { - "playwright": { - "optional": true - }, - "safaridriver": { - "optional": true - }, - "webdriverio": { - "optional": true - } - } - }, - "node_modules/@vitest/expect": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.4.tgz", - "integrity": "sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/chai": "^5.2.2", - "@vitest/spy": "3.2.4", - "@vitest/utils": "3.2.4", - "chai": "^5.2.0", - "tinyrainbow": "^2.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/mocker": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.4.tgz", - "integrity": "sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/spy": "3.2.4", - "estree-walker": "^3.0.3", - "magic-string": "^0.30.17" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "msw": "^2.4.9", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" - }, - "peerDependenciesMeta": { - "msw": { - "optional": true - }, - "vite": { - "optional": true - } - } - }, - "node_modules/@vitest/pretty-format": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.4.tgz", - "integrity": "sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==", - "dev": true, - "license": "MIT", - "dependencies": { - "tinyrainbow": "^2.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/runner": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.4.tgz", - "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/utils": "3.2.4", - "pathe": "^2.0.3", - "strip-literal": "^3.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/snapshot": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.4.tgz", - "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/pretty-format": "3.2.4", - "magic-string": "^0.30.17", - "pathe": "^2.0.3" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/spy": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.4.tgz", - "integrity": "sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==", - "dev": true, - "license": "MIT", - "dependencies": { - "tinyspy": "^4.0.3" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/utils": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.4.tgz", - "integrity": "sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/pretty-format": "3.2.4", - "loupe": "^3.1.4", - "tinyrainbow": "^2.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@xyflow/react": { - "version": "12.8.4", - "resolved": "https://registry.npmjs.org/@xyflow/react/-/react-12.8.4.tgz", - "integrity": "sha512-bqUu4T5QSHiCFPkoH+b+LROKwQJdLvcjhGbNW9c1dLafCBRjmH1IYz0zPE+lRDXCtQ9kRyFxz3tG19+8VORJ1w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@xyflow/system": "0.0.68", - "classcat": "^5.0.3", - "zustand": "^4.4.0" - }, - "peerDependencies": { - "react": ">=17", - "react-dom": ">=17" - } - }, - "node_modules/@xyflow/system": { - "version": "0.0.68", - "resolved": "https://registry.npmjs.org/@xyflow/system/-/system-0.0.68.tgz", - "integrity": "sha512-QDG2wxIG4qX+uF8yzm1ULVZrcXX3MxPBoxv7O52FWsX87qIImOqifUhfa/TwsvLdzn7ic2DDBH1uI8TKbdNTYA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-drag": "^3.0.7", - "@types/d3-interpolate": "^3.0.4", - "@types/d3-selection": "^3.0.10", - "@types/d3-transition": "^3.0.8", - "@types/d3-zoom": "^3.0.8", - "d3-drag": "^3.0.0", - "d3-interpolate": "^3.0.1", - "d3-selection": "^3.0.0", - "d3-zoom": "^3.0.0" - } - }, - "node_modules/acorn": { - "version": "8.15.0", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", - "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", - "dev": true, - "license": "MIT", - "bin": { - "acorn": "bin/acorn" - }, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/ansi-colors": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", - "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/any-promise": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", - "dev": true, - "license": "MIT" - }, - "node_modules/anymatch": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", - "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", - "dev": true, - "license": "ISC", - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/arg": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", - "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==", - "dev": true, - "license": "MIT" - }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, - "license": "Python-2.0" - }, - "node_modules/aria-hidden": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.6.tgz", - "integrity": "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==", - "dev": true, - "license": "MIT", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/aria-query": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", - "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "dequal": "^2.0.3" - } - }, - "node_modules/assertion-error": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", - "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - } - }, - "node_modules/ast-types": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.16.1.tgz", - "integrity": "sha512-6t10qk83GOG8p0vKmaCr8eiilZwO171AvbROMtvvNiwrTly62t+7XkA8RdIIVbpMhCASAsxgAzdRSwh6nw/5Dg==", - "dev": true, - "license": "MIT", - "dependencies": { - "tslib": "^2.0.1" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/autoprefixer": { - "version": "10.4.21", - "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.21.tgz", - "integrity": "sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/autoprefixer" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "browserslist": "^4.24.4", - "caniuse-lite": "^1.0.30001702", - "fraction.js": "^4.3.7", - "normalize-range": "^0.1.2", - "picocolors": "^1.1.1", - "postcss-value-parser": "^4.2.0" - }, - "bin": { - "autoprefixer": "bin/autoprefixer" - }, - "engines": { - "node": "^10 || ^12 || >=14" - }, - "peerDependencies": { - "postcss": "^8.1.0" - } - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true, - "license": "MIT" - }, - "node_modules/better-opn": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/better-opn/-/better-opn-3.0.2.tgz", - "integrity": "sha512-aVNobHnJqLiUelTaHat9DZ1qM2w0C0Eym4LPI/3JxOnSokGVdsl1T1kN7TFvsEAD8G47A6VKQ0TVHqbBnYMJlQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "open": "^8.0.4" - }, - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/braces": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", - "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", - "dev": true, - "license": "MIT", - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/browserslist": { - "version": "4.25.2", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.2.tgz", - "integrity": "sha512-0si2SJK3ooGzIawRu61ZdPCO1IncZwS8IzuX73sPZsXW6EQ/w/DAfPyKI8l1ETTCr2MnvqWitmlCUxgdul45jA==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "caniuse-lite": "^1.0.30001733", - "electron-to-chromium": "^1.5.199", - "node-releases": "^2.0.19", - "update-browserslist-db": "^1.1.3" - }, - "bin": { - "browserslist": "cli.js" - }, - "engines": { - "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" - } - }, - "node_modules/cac": { - "version": "6.7.14", - "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", - "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/callsites": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", - "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/camelcase-css": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", - "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/caniuse-lite": { - "version": "1.0.30001735", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001735.tgz", - "integrity": "sha512-EV/laoX7Wq2J9TQlyIXRxTJqIw4sxfXS4OYgudGxBYRuTv0q7AM6yMEpU/Vo1I94thg9U6EZ2NfZx9GJq83u7w==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/caniuse-lite" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "CC-BY-4.0" - }, - "node_modules/chai": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/chai/-/chai-5.2.1.tgz", - "integrity": "sha512-5nFxhUrX0PqtyogoYOA8IPswy5sZFTOsBFl/9bNsmDLgsxYTzSZQJDPppDnZPTQbzSEm0hqGjWPzRemQCYbD6A==", - "dev": true, - "license": "MIT", - "dependencies": { - "assertion-error": "^2.0.1", - "check-error": "^2.1.1", - "deep-eql": "^5.0.1", - "loupe": "^3.1.0", - "pathval": "^2.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/chalk": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", - "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^12.17.0 || ^14.13 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/chalk-template": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-1.1.2.tgz", - "integrity": "sha512-2bxTP2yUH7AJj/VAXfcA+4IcWGdQ87HwBANLt5XxGTeomo8yG0y95N1um9i5StvhT/Bl0/2cARA5v1PpPXUxUA==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.2.0" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/chalk/chalk-template?sponsor=1" - } - }, - "node_modules/check-error": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.1.tgz", - "integrity": "sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 16" - } - }, - "node_modules/chokidar": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", - "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", - "dev": true, - "license": "MIT", - "dependencies": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" - }, - "engines": { - "node": ">= 8.10.0" - }, - "funding": { - "url": "https://paulmillr.com/funding/" - }, - "optionalDependencies": { - "fsevents": "~2.3.2" - } - }, - "node_modules/chokidar/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/class-variance-authority": { - "version": "0.7.1", - "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.1.tgz", - "integrity": "sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "clsx": "^2.1.1" - }, - "funding": { - "url": "https://polar.sh/cva" - } - }, - "node_modules/classcat": { - "version": "5.0.5", - "resolved": "https://registry.npmjs.org/classcat/-/classcat-5.0.5.tgz", - "integrity": "sha512-JhZUT7JFcQy/EzW605k/ktHtncoo9vnyW/2GspNYwFlN1C/WmjuV/xtS04e9SOkL2sTdw0VAZ2UGCcQ9lR6p6w==", - "dev": true, - "license": "MIT" - }, - "node_modules/cli-cursor": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-5.0.0.tgz", - "integrity": "sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw==", - "dev": true, - "license": "MIT", - "dependencies": { - "restore-cursor": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-spinners": { - "version": "2.9.2", - "resolved": "https://registry.npmjs.org/cli-spinners/-/cli-spinners-2.9.2.tgz", - "integrity": "sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/clsx": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", - "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/cmdk": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/cmdk/-/cmdk-1.1.1.tgz", - "integrity": "sha512-Vsv7kFaXm+ptHDMZ7izaRsP70GgrW9NBNGswt9OZaVBLlE0SNpDq8eu/VGXyF9r7M0azK3Wy7OlYXsuyYLFzHg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "^1.1.1", - "@radix-ui/react-dialog": "^1.1.6", - "@radix-ui/react-id": "^1.1.0", - "@radix-ui/react-primitive": "^2.0.2" - }, - "peerDependencies": { - "react": "^18 || ^19 || ^19.0.0-rc", - "react-dom": "^18 || ^19 || ^19.0.0-rc" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true, - "license": "MIT" - }, - "node_modules/commander": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", - "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/convert-source-map": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", - "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", - "dev": true, - "license": "MIT" - }, - "node_modules/cosmiconfig": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz", - "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", - "dev": true, - "license": "MIT", - "dependencies": { - "env-paths": "^2.2.1", - "import-fresh": "^3.3.0", - "js-yaml": "^4.1.0", - "parse-json": "^5.2.0" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/d-fischer" - }, - "peerDependencies": { - "typescript": ">=4.9.5" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/cross-spawn": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", - "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/css.escape": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/css.escape/-/css.escape-1.5.1.tgz", - "integrity": "sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==", - "dev": true, - "license": "MIT" - }, - "node_modules/cssesc": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", - "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", - "dev": true, - "license": "MIT", - "bin": { - "cssesc": "bin/cssesc" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/csstype": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", - "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "dev": true, - "license": "MIT" - }, - "node_modules/d3-color": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", - "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-dispatch": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-dispatch/-/d3-dispatch-3.0.1.tgz", - "integrity": "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-drag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-drag/-/d3-drag-3.0.0.tgz", - "integrity": "sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-selection": "3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-ease": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz", - "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-interpolate": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", - "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-selection": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-timer": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz", - "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-transition": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-transition/-/d3-transition-3.0.1.tgz", - "integrity": "sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3", - "d3-dispatch": "1 - 3", - "d3-ease": "1 - 3", - "d3-interpolate": "1 - 3", - "d3-timer": "1 - 3" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "d3-selection": "2 - 3" - } - }, - "node_modules/d3-zoom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-zoom/-/d3-zoom-3.0.0.tgz", - "integrity": "sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-drag": "2 - 3", - "d3-interpolate": "1 - 3", - "d3-selection": "2 - 3", - "d3-transition": "2 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/debug": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", - "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/deep-eql": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", - "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/define-lazy-prop": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz", - "integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/dequal": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", - "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/detect-node-es": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", - "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/didyoumean": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", - "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==", - "dev": true, - "license": "Apache-2.0" - }, - "node_modules/dlv": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", - "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==", - "dev": true, - "license": "MIT" - }, - "node_modules/doctrine": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", - "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "esutils": "^2.0.2" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/dom-accessibility-api": { - "version": "0.5.16", - "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz", - "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==", - "dev": true, - "license": "MIT" - }, - "node_modules/eastasianwidth": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", - "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "dev": true, - "license": "MIT" - }, - "node_modules/effect": { - "version": "3.17.13", - "resolved": "https://registry.npmjs.org/effect/-/effect-3.17.13.tgz", - "integrity": "sha512-JMz5oBxs/6mu4FP9Csjub4jYMUwMLrp+IzUmSDVIzn2NoeoyOXMl7x1lghfr3dLKWffWrdnv/d8nFFdgrHXPqw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@standard-schema/spec": "^1.0.0", - "fast-check": "^3.23.1" - } - }, - "node_modules/electron-to-chromium": { - "version": "1.5.201", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.201.tgz", - "integrity": "sha512-ZG65vsrLClodGqywuigc+7m0gr4ISoTQttfVh7nfpLv0M7SIwF4WbFNEOywcqTiujs12AUeeXbFyQieDICAIxg==", - "dev": true, - "license": "ISC" - }, - "node_modules/emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true, - "license": "MIT" - }, - "node_modules/enquirer": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.4.1.tgz", - "integrity": "sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-colors": "^4.1.1", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/enquirer/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/env-paths": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", - "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/error-ex": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", - "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-arrayish": "^0.2.1" - } - }, - "node_modules/es-module-lexer": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", - "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==", - "dev": true, - "license": "MIT" - }, - "node_modules/esbuild": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.9.tgz", - "integrity": "sha512-CRbODhYyQx3qp7ZEwzxOk4JBqmD/seJrzPa/cGjY1VtIn5E09Oi9/dB4JwctnfZ8Q8iT7rioVv5k/FNT/uf54g==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.25.9", - "@esbuild/android-arm": "0.25.9", - "@esbuild/android-arm64": "0.25.9", - "@esbuild/android-x64": "0.25.9", - "@esbuild/darwin-arm64": "0.25.9", - "@esbuild/darwin-x64": "0.25.9", - "@esbuild/freebsd-arm64": "0.25.9", - "@esbuild/freebsd-x64": "0.25.9", - "@esbuild/linux-arm": "0.25.9", - "@esbuild/linux-arm64": "0.25.9", - "@esbuild/linux-ia32": "0.25.9", - "@esbuild/linux-loong64": "0.25.9", - "@esbuild/linux-mips64el": "0.25.9", - "@esbuild/linux-ppc64": "0.25.9", - "@esbuild/linux-riscv64": "0.25.9", - "@esbuild/linux-s390x": "0.25.9", - "@esbuild/linux-x64": "0.25.9", - "@esbuild/netbsd-arm64": "0.25.9", - "@esbuild/netbsd-x64": "0.25.9", - "@esbuild/openbsd-arm64": "0.25.9", - "@esbuild/openbsd-x64": "0.25.9", - "@esbuild/openharmony-arm64": "0.25.9", - "@esbuild/sunos-x64": "0.25.9", - "@esbuild/win32-arm64": "0.25.9", - "@esbuild/win32-ia32": "0.25.9", - "@esbuild/win32-x64": "0.25.9" - } - }, - "node_modules/esbuild-register": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/esbuild-register/-/esbuild-register-3.6.0.tgz", - "integrity": "sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==", - "dev": true, - "license": "MIT", - "dependencies": { - "debug": "^4.3.4" - }, - "peerDependencies": { - "esbuild": ">=0.12 <1" - } - }, - "node_modules/escalade": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", - "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/eslint": { - "resolved": "../../node_modules/.pnpm/eslint@9.31.0_jiti@2.4.2/node_modules/eslint", - "link": true - }, - "node_modules/eslint-plugin-react-hooks": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-5.2.0.tgz", - "integrity": "sha512-+f15FfK64YQwZdJNELETdn5ibXEUQmW1DZL6KXhNnc2heoy/sg9VJJeT7n8TlMWouzWqSWavFkIhHyIbIAEapg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" - } - }, - "node_modules/eslint-plugin-storybook": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-9.1.5.tgz", - "integrity": "sha512-vCfaZ2Wk1N1vvK4vmNZoA6y2CYxJwbgIs6BE8/toPf4Z6hCAipoobP6a/30Rs0g/B2TSxTSj41TfrJKJrowpjQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/utils": "^8.8.1" - }, - "engines": { - "node": ">=20.0.0" - }, - "peerDependencies": { - "eslint": ">=8", - "storybook": "^9.1.5" - } - }, - "node_modules/eslint-visitor-keys": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", - "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/esprima": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", - "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, - "license": "BSD-2-Clause", - "bin": { - "esparse": "bin/esparse.js", - "esvalidate": "bin/esvalidate.js" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/estree-walker": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", - "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, - "node_modules/esutils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", - "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/expect-type": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz", - "integrity": "sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/fast-check": { - "version": "3.23.2", - "resolved": "https://registry.npmjs.org/fast-check/-/fast-check-3.23.2.tgz", - "integrity": "sha512-h5+1OzzfCC3Ef7VbtKdcv7zsstUQwUDlYpUTvjeUsJAssPgLn7QzbboPtL5ro04Mq0rPOsMzl7q5hIbRs2wD1A==", - "dev": true, - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/dubzzz" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/fast-check" - } - ], - "license": "MIT", - "dependencies": { - "pure-rand": "^6.1.0" - }, - "engines": { - "node": ">=8.0.0" - } - }, - "node_modules/fast-glob": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", - "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.8" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/fast-glob/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/fastq": { - "version": "1.19.1", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", - "integrity": "sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "reusify": "^1.0.4" - } - }, - "node_modules/fill-range": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", - "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", - "dev": true, - "license": "MIT", - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/find-up": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-7.0.0.tgz", - "integrity": "sha512-YyZM99iHrqLKjmt4LJDj58KI+fYyufRLBSYcqycxf//KpBk9FoewoGX0450m9nB44qrZnovzC2oeP5hUibxc/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "locate-path": "^7.2.0", - "path-exists": "^5.0.0", - "unicorn-magic": "^0.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/foreground-child": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", - "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", - "dev": true, - "license": "ISC", - "dependencies": { - "cross-spawn": "^7.0.6", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/fraction.js": { - "version": "4.3.7", - "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", - "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==", - "dev": true, - "license": "MIT", - "engines": { - "node": "*" - }, - "funding": { - "type": "patreon", - "url": "https://github.com/sponsors/rawify" - } - }, - "node_modules/fs-extra": { - "version": "11.3.1", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.1.tgz", - "integrity": "sha512-eXvGGwZ5CL17ZSwHWd3bbgk7UUpF6IFHtP57NYYakPvHOs8GDgDe5KJI36jIJzDkJ6eJjuzRA8eBQb6SkKue0g==", - "dev": true, - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/function-bind": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", - "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/fuse.js": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.1.0.tgz", - "integrity": "sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=10" - } - }, - "node_modules/gensync": { - "version": "1.0.0-beta.2", - "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", - "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/get-east-asian-width": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.3.1.tgz", - "integrity": "sha512-R1QfovbPsKmosqTnPoRFiJ7CF9MLRgb53ChvMZm+r4p76/+8yKDy17qLL2PKInORy2RkZZekuK0efYgmzTkXyQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/get-nonce": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz", - "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "dev": true, - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/glob-parent": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", - "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.3" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/globals": { - "version": "16.3.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-16.3.0.tgz", - "integrity": "sha512-bqWEnJ1Nt3neqx2q5SFfGS8r/ahumIakg3HcwtNlrVlwXIeNumWn/c7Pn/wKzGhf6SaW6H6uWXLqC30STCMchQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/globby": { - "version": "14.1.0", - "resolved": "https://registry.npmjs.org/globby/-/globby-14.1.0.tgz", - "integrity": "sha512-0Ia46fDOaT7k4og1PDW4YbodWWr3scS2vAr2lTbsplOt2WkKp0vQbkI9wKis/T5LV/dqPjO3bpS/z6GTJB82LA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@sindresorhus/merge-streams": "^2.1.0", - "fast-glob": "^3.3.3", - "ignore": "^7.0.3", - "path-type": "^6.0.0", - "slash": "^5.1.0", - "unicorn-magic": "^0.3.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/globby/node_modules/unicorn-magic": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.3.0.tgz", - "integrity": "sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", - "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/hosted-git-info": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-8.1.0.tgz", - "integrity": "sha512-Rw/B2DNQaPBICNXEm8balFz9a6WpZrkCGpcWFpy7nCj+NyhSdqXipmfvtmWt9xGfp0wZnBxB+iVpLmQMYt47Tw==", - "dev": true, - "license": "ISC", - "dependencies": { - "lru-cache": "^10.0.1" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/ignore": { - "version": "7.0.5", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", - "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/import-fresh": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", - "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "parent-module": "^1.0.0", - "resolve-from": "^4.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/indent-string": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", - "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/is-arrayish": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", - "dev": true, - "license": "MIT" - }, - "node_modules/is-binary-path": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", - "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", - "dev": true, - "license": "MIT", - "dependencies": { - "binary-extensions": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-core-module": { - "version": "2.16.1", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz", - "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==", - "dev": true, - "license": "MIT", - "dependencies": { - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-docker": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz", - "integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==", - "dev": true, - "license": "MIT", - "bin": { - "is-docker": "cli.js" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/is-glob": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", - "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-extglob": "^2.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-interactive": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-interactive/-/is-interactive-2.0.0.tgz", - "integrity": "sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/is-unicode-supported": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-2.1.0.tgz", - "integrity": "sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-wsl": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-2.2.0.tgz", - "integrity": "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-docker": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true, - "license": "ISC" - }, - "node_modules/jackspeak": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", - "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/cliui": "^8.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - }, - "optionalDependencies": { - "@pkgjs/parseargs": "^0.11.0" - } - }, - "node_modules/jiti": { - "version": "1.21.7", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz", - "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==", - "dev": true, - "license": "MIT", - "bin": { - "jiti": "bin/jiti.js" - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "dev": true, - "license": "MIT", - "dependencies": { - "argparse": "^2.0.1" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/jsesc": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", - "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", - "dev": true, - "license": "MIT", - "bin": { - "jsesc": "bin/jsesc" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/json-parse-even-better-errors": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", - "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", - "dev": true, - "license": "MIT" - }, - "node_modules/json5": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", - "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", - "dev": true, - "license": "MIT", - "bin": { - "json5": "lib/cli.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/jsonc-parser": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", - "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/jsonfile": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz", - "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==", - "dev": true, - "license": "MIT", - "dependencies": { - "universalify": "^2.0.0" - }, - "optionalDependencies": { - "graceful-fs": "^4.1.6" - } - }, - "node_modules/kleur": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", - "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/lilconfig": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", - "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antonk52" - } - }, - "node_modules/lines-and-columns": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", - "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", - "dev": true, - "license": "MIT" - }, - "node_modules/locate-path": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-7.2.0.tgz", - "integrity": "sha512-gvVijfZvn7R+2qyPX8mAuKcFGDf6Nc61GdvGafQsHL0sBIxfKzA+usWn4GFC/bk+QdwPUD4kWFJLhElipq+0VA==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-locate": "^6.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/lodash.castarray": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/lodash.castarray/-/lodash.castarray-4.4.0.tgz", - "integrity": "sha512-aVx8ztPv7/2ULbArGJ2Y42bG1mEQ5mGjpdvrbJcJFU3TbYybe+QlLS4pst9zV52ymy2in1KpFPiZnAOATxD4+Q==", - "dev": true, - "license": "MIT" - }, - "node_modules/lodash.isplainobject": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", - "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", - "dev": true, - "license": "MIT" - }, - "node_modules/lodash.merge": { - "version": "4.6.2", - "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", - "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/log-symbols": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-6.0.0.tgz", - "integrity": "sha512-i24m8rpwhmPIS4zscNzK6MSEhk0DUWa/8iYQWxhffV8jkI4Phvs3F+quL5xvS0gdQR0FyTCMMH33Y78dDTzzIw==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.3.0", - "is-unicode-supported": "^1.3.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/log-symbols/node_modules/is-unicode-supported": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-1.3.0.tgz", - "integrity": "sha512-43r2mRvz+8JRIKnWJ+3j8JtjRKZ6GmjzfaE/qiBJnikNnYv/6bagRJ1kUhNk8R5EX/GkobD+r+sfxCPJsiKBLQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/loupe": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.0.tgz", - "integrity": "sha512-2NCfZcT5VGVNX9mSZIxLRkEAegDGBpuQZBy13desuHeVORmBDyAET4TkJr4SjqQy3A8JDofMN6LpkK8Xcm/dlw==", - "dev": true, - "license": "MIT" - }, - "node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/lucide-react": { - "version": "0.542.0", - "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.542.0.tgz", - "integrity": "sha512-w3hD8/SQB7+lzU2r4VdFyzzOzKnUjTZIF/MQJGSSvni7Llewni4vuViRppfRAa2guOsY5k4jZyxw/i9DQHv+dw==", - "dev": true, - "license": "ISC", - "peerDependencies": { - "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/lz-string": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", - "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==", - "dev": true, - "license": "MIT", - "bin": { - "lz-string": "bin/bin.js" - } - }, - "node_modules/magic-string": { - "version": "0.30.17", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", - "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0" - } - }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/micromatch": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", - "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", - "dev": true, - "license": "MIT", - "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/mimic-function": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/mimic-function/-/mimic-function-5.0.1.tgz", - "integrity": "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/min-indent": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", - "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/mrmime": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-2.0.1.tgz", - "integrity": "sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - } - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, - "license": "MIT" - }, - "node_modules/mz": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", - "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0", - "object-assign": "^4.0.1", - "thenify-all": "^1.0.0" - } - }, - "node_modules/nanoid": { - "version": "3.3.11", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", - "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/node-releases": { - "version": "2.0.19", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz", - "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==", - "dev": true, - "license": "MIT" - }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/normalize-range": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", - "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/npm-package-arg": { - "version": "12.0.2", - "resolved": "https://registry.npmjs.org/npm-package-arg/-/npm-package-arg-12.0.2.tgz", - "integrity": "sha512-f1NpFjNI9O4VbKMOlA5QoBq/vSQPORHcTZ2feJpFkTHJ9eQkdlmZEKSjcAhxTGInC7RlEyScT9ui67NaOsjFWA==", - "dev": true, - "license": "ISC", - "dependencies": { - "hosted-git-info": "^8.0.0", - "proc-log": "^5.0.0", - "semver": "^7.3.5", - "validate-npm-package-name": "^6.0.0" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/onetime": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/onetime/-/onetime-7.0.0.tgz", - "integrity": "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "mimic-function": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/open": { - "version": "8.4.2", - "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz", - "integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-lazy-prop": "^2.0.0", - "is-docker": "^2.1.1", - "is-wsl": "^2.2.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/ora": { - "version": "8.2.0", - "resolved": "https://registry.npmjs.org/ora/-/ora-8.2.0.tgz", - "integrity": "sha512-weP+BZ8MVNnlCm8c0Qdc1WSWq4Qn7I+9CJGm7Qali6g44e/PUzbjNqJX5NJ9ljlNMosfJvg1fKEGILklK9cwnw==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.3.0", - "cli-cursor": "^5.0.0", - "cli-spinners": "^2.9.2", - "is-interactive": "^2.0.0", - "is-unicode-supported": "^2.0.0", - "log-symbols": "^6.0.0", - "stdin-discarder": "^0.2.2", - "string-width": "^7.2.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/ora/node_modules/emoji-regex": { - "version": "10.5.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.5.0.tgz", - "integrity": "sha512-lb49vf1Xzfx080OKA0o6l8DQQpV+6Vg95zyCJX9VB/BqKYlhG7N4wgROUUHRA+ZPUefLnteQOad7z1kT2bV7bg==", - "dev": true, - "license": "MIT" - }, - "node_modules/ora/node_modules/string-width": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz", - "integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^10.3.0", - "get-east-asian-width": "^1.0.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-limit": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-4.0.0.tgz", - "integrity": "sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "yocto-queue": "^1.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-locate": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-6.0.0.tgz", - "integrity": "sha512-wPrq66Llhl7/4AGC6I+cqxT07LhXvWL08LNXz1fENOw0Ap4sRZZ/gZpTTJ5jpurzzzfS2W/Ge9BY3LgLjCShcw==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-limit": "^4.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-map": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.3.tgz", - "integrity": "sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/package-json-from-dist": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", - "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "dev": true, - "license": "BlueOak-1.0.0" - }, - "node_modules/parent-module": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", - "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", - "dev": true, - "license": "MIT", - "dependencies": { - "callsites": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/parse-json": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", - "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.0.0", - "error-ex": "^1.3.1", - "json-parse-even-better-errors": "^2.3.0", - "lines-and-columns": "^1.1.6" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/path-exists": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-5.0.0.tgz", - "integrity": "sha512-RjhtfwJOxzcFmNOi6ltcbcu4Iu+FL3zEj83dk4kAS+fVpTxXLO1b38RvJgT/0QwvV/L3aY9TAnyv0EOqW4GoMQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - } - }, - "node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/path-parse": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", - "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", - "dev": true, - "license": "MIT" - }, - "node_modules/path-scurry": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", - "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^10.2.0", - "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" - }, - "engines": { - "node": ">=16 || 14 >=14.18" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/path-type": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-6.0.0.tgz", - "integrity": "sha512-Vj7sf++t5pBD637NSfkxpHSMfWaeig5+DKWLhcqIYx6mWQz5hdJTGDVMQiJcw1ZYkhs7AazKDGpRVji1LJCZUQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/pathe": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", - "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", - "dev": true, - "license": "MIT" - }, - "node_modules/pathval": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz", - "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 14.16" - } - }, - "node_modules/picocolors": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", - "dev": true, - "license": "ISC" - }, - "node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/pify": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", - "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/pirates": { - "version": "4.0.7", - "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz", - "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/playwright": { - "version": "1.55.0", - "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.55.0.tgz", - "integrity": "sha512-sdCWStblvV1YU909Xqx0DhOjPZE4/5lJsIS84IfN9dAZfcl/CIZ5O8l3o0j7hPMjDvqoTF8ZUcc+i/GL5erstA==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "playwright-core": "1.55.0" - }, - "bin": { - "playwright": "cli.js" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "fsevents": "2.3.2" - } - }, - "node_modules/playwright-core": { - "version": "1.55.0", - "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.55.0.tgz", - "integrity": "sha512-GvZs4vU3U5ro2nZpeiwyb0zuFaqb9sUiAJuyrWpcGouD8y9/HLgGbNRjIph7zU9D3hnPaisMl9zG9CgFi/biIg==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "playwright-core": "cli.js" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/playwright/node_modules/fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/postcss": { - "version": "8.5.6", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", - "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.11", - "picocolors": "^1.1.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/postcss-import": { - "version": "15.1.0", - "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz", - "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==", - "dev": true, - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.0.0", - "read-cache": "^1.0.0", - "resolve": "^1.1.7" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "postcss": "^8.0.0" - } - }, - "node_modules/postcss-js": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.0.1.tgz", - "integrity": "sha512-dDLF8pEO191hJMtlHFPRa8xsizHaM82MLfNkUHdUtVEV3tgTp5oj+8qbEqYM57SLfc74KSbw//4SeJma2LRVIw==", - "dev": true, - "license": "MIT", - "dependencies": { - "camelcase-css": "^2.0.1" - }, - "engines": { - "node": "^12 || ^14 || >= 16" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - "peerDependencies": { - "postcss": "^8.4.21" - } - }, - "node_modules/postcss-load-config": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", - "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "lilconfig": "^3.0.0", - "yaml": "^2.3.4" - }, - "engines": { - "node": ">= 14" - }, - "peerDependencies": { - "postcss": ">=8.0.9", - "ts-node": ">=9.0.0" - }, - "peerDependenciesMeta": { - "postcss": { - "optional": true - }, - "ts-node": { - "optional": true - } - } - }, - "node_modules/postcss-nested": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz", - "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "postcss-selector-parser": "^6.1.1" - }, - "engines": { - "node": ">=12.0" - }, - "peerDependencies": { - "postcss": "^8.2.14" - } - }, - "node_modules/postcss-nested/node_modules/postcss-selector-parser": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", - "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-selector-parser": { - "version": "6.0.10", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.10.tgz", - "integrity": "sha512-IQ7TZdoaqbT+LCpShg46jnZVlhWD2w6iQYAcYXfHARZ7X1t/UGhhceQDs5X0cGqKvYlHNOuv7Oa1xmb0oQuA3w==", - "dev": true, - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-value-parser": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", - "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/pretty-format": { - "version": "27.5.1", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz", - "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1", - "ansi-styles": "^5.0.0", - "react-is": "^17.0.1" - }, - "engines": { - "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0" - } - }, - "node_modules/prism-react-renderer": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/prism-react-renderer/-/prism-react-renderer-2.4.1.tgz", - "integrity": "sha512-ey8Ls/+Di31eqzUxC46h8MksNuGx/n0AAC8uKpwFau4RPDYLuE3EXTp8N8G2vX2N7UC/+IXeNUnlWBGGcAG+Ig==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/prismjs": "^1.26.0", - "clsx": "^2.0.0" - }, - "peerDependencies": { - "react": ">=16.0.0" - } - }, - "node_modules/proc-log": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/proc-log/-/proc-log-5.0.0.tgz", - "integrity": "sha512-Azwzvl90HaF0aCz1JrDdXQykFakSSNPaPoiZ9fm5qJIMHioDZEi7OAdRwSm6rSoPtY3Qutnm3L7ogmg3dc+wbQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/prompts": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", - "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "kleur": "^3.0.3", - "sisteransi": "^1.0.5" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/pure-rand": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz", - "integrity": "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==", - "dev": true, - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/dubzzz" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/fast-check" - } - ], - "license": "MIT" - }, - "node_modules/queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react-docgen": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/react-docgen/-/react-docgen-8.0.0.tgz", - "integrity": "sha512-kmob/FOTwep7DUWf9KjuenKX0vyvChr3oTdvvPt09V60Iz75FJp+T/0ZeHMbAfJj2WaVWqAPP5Hmm3PYzSPPKg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.18.9", - "@babel/traverse": "^7.18.9", - "@babel/types": "^7.18.9", - "@types/babel__core": "^7.18.0", - "@types/babel__traverse": "^7.18.0", - "@types/doctrine": "^0.0.9", - "@types/resolve": "^1.20.2", - "doctrine": "^3.0.0", - "resolve": "^1.22.1", - "strip-indent": "^4.0.0" - }, - "engines": { - "node": "^20.9.0 || >=22" - } - }, - "node_modules/react-docgen-typescript": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/react-docgen-typescript/-/react-docgen-typescript-2.4.0.tgz", - "integrity": "sha512-ZtAp5XTO5HRzQctjPU0ybY0RRCQO19X/8fxn3w7y2VVTUbGHDKULPTL4ky3vB05euSgG5NpALhEhDPvQ56wvXg==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "typescript": ">= 4.3.x" - } - }, - "node_modules/react-docgen/node_modules/strip-indent": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-4.0.0.tgz", - "integrity": "sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA==", - "dev": true, - "license": "MIT", - "dependencies": { - "min-indent": "^1.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/react-dom": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", - "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", - "dev": true, - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0", - "scheduler": "^0.23.2" - }, - "peerDependencies": { - "react": "^18.3.1" - } - }, - "node_modules/react-is": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", - "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==", - "dev": true, - "license": "MIT" - }, - "node_modules/react-remove-scroll": { - "version": "2.7.1", - "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.7.1.tgz", - "integrity": "sha512-HpMh8+oahmIdOuS5aFKKY6Pyog+FNaZV/XyJOq7b4YFwsFHe5yYfdbIalI4k3vU2nSDql7YskmUseHsRrJqIPA==", - "dev": true, - "license": "MIT", - "dependencies": { - "react-remove-scroll-bar": "^2.3.7", - "react-style-singleton": "^2.2.3", - "tslib": "^2.1.0", - "use-callback-ref": "^1.3.3", - "use-sidecar": "^1.1.3" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-remove-scroll-bar": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz", - "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "react-style-singleton": "^2.2.2", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-style-singleton": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz", - "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "get-nonce": "^1.0.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/read-cache": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", - "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", - "dev": true, - "license": "MIT", - "dependencies": { - "pify": "^2.3.0" - } - }, - "node_modules/read-yaml-file": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/read-yaml-file/-/read-yaml-file-2.1.0.tgz", - "integrity": "sha512-UkRNRIwnhG+y7hpqnycCL/xbTk7+ia9VuVTC0S+zVbwd65DI9eUpRMfsWIGrCWxTU/mi+JW8cHQCrv+zfCbEPQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "js-yaml": "^4.0.0", - "strip-bom": "^4.0.0" - }, - "engines": { - "node": ">=10.13" - } - }, - "node_modules/read-yaml-file/node_modules/strip-bom": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz", - "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/readdirp": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", - "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "picomatch": "^2.2.1" - }, - "engines": { - "node": ">=8.10.0" - } - }, - "node_modules/recast": { - "version": "0.23.11", - "resolved": "https://registry.npmjs.org/recast/-/recast-0.23.11.tgz", - "integrity": "sha512-YTUo+Flmw4ZXiWfQKGcwwc11KnoRAYgzAE2E7mXKCjSviTKShtxBsN6YUUBB2gtaBzKzeKunxhUwNHQuRryhWA==", - "dev": true, - "license": "MIT", - "dependencies": { - "ast-types": "^0.16.1", - "esprima": "~4.0.0", - "source-map": "~0.6.1", - "tiny-invariant": "^1.3.3", - "tslib": "^2.0.1" - }, - "engines": { - "node": ">= 4" - } - }, - "node_modules/redent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz", - "integrity": "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==", - "dev": true, - "license": "MIT", - "dependencies": { - "indent-string": "^4.0.0", - "strip-indent": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/resolve": { - "version": "1.22.10", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.10.tgz", - "integrity": "sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-core-module": "^2.16.0", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve-from": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", - "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/restore-cursor": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-5.1.0.tgz", - "integrity": "sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA==", - "dev": true, - "license": "MIT", - "dependencies": { - "onetime": "^7.0.0", - "signal-exit": "^4.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/reusify": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", - "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", - "dev": true, - "license": "MIT", - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" - } - }, - "node_modules/run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "queue-microtask": "^1.2.2" - } - }, - "node_modules/scheduler": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", - "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0" - } - }, - "node_modules/semver": { - "version": "7.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz", - "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, - "license": "MIT", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/siginfo": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", - "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==", - "dev": true, - "license": "ISC" - }, - "node_modules/signal-exit": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", - "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sirv": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/sirv/-/sirv-3.0.2.tgz", - "integrity": "sha512-2wcC/oGxHis/BoHkkPwldgiPSYcpZK3JU28WoMVv55yHJgcZ8rlXvuG9iZggz+sU1d4bRgIGASwyWqjxu3FM0g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@polka/url": "^1.0.0-next.24", - "mrmime": "^2.0.0", - "totalist": "^3.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/sisteransi": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", - "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", - "dev": true, - "license": "MIT" - }, - "node_modules/slash": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-5.1.0.tgz", - "integrity": "sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/stackback": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", - "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==", - "dev": true, - "license": "MIT" - }, - "node_modules/std-env": { - "version": "3.9.0", - "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.9.0.tgz", - "integrity": "sha512-UGvjygr6F6tpH7o2qyqR6QYpwraIjKSdtzyBdyytFOHmPZY917kwdwLG0RbOjWOnKmnm3PeHjaoLLMie7kPLQw==", - "dev": true, - "license": "MIT" - }, - "node_modules/stdin-discarder": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/stdin-discarder/-/stdin-discarder-0.2.2.tgz", - "integrity": "sha512-UhDfHmA92YAlNnCfhmq0VeNL5bDbiZGg7sZ2IvPsXubGkiNa9EC+tUTsjBRsYUAz87btI6/1wf4XoVvQ3uRnmQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/storybook": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/storybook/-/storybook-9.1.5.tgz", - "integrity": "sha512-cGwJ2AE6nxlwqQlOiI+HKX5qa7+FOV7Ha7Qa+GoASBIQSSnLfbY6UldgAxHCJGJOFtgW/wuqfDtNvni6sj1/OQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@storybook/global": "^5.0.0", - "@testing-library/jest-dom": "^6.6.3", - "@testing-library/user-event": "^14.6.1", - "@vitest/expect": "3.2.4", - "@vitest/mocker": "3.2.4", - "@vitest/spy": "3.2.4", - "better-opn": "^3.0.2", - "esbuild": "^0.18.0 || ^0.19.0 || ^0.20.0 || ^0.21.0 || ^0.22.0 || ^0.23.0 || ^0.24.0 || ^0.25.0", - "esbuild-register": "^3.5.0", - "recast": "^0.23.5", - "semver": "^7.6.2", - "ws": "^8.18.0" - }, - "bin": { - "storybook": "bin/index.cjs" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "prettier": "^2 || ^3" - }, - "peerDependenciesMeta": { - "prettier": { - "optional": true - } - } - }, - "node_modules/string-width": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", - "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", - "dev": true, - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^9.2.2", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/string-width-cjs": { - "name": "string-width", - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/string-width-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", - "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/strip-ansi-cjs": { - "name": "strip-ansi", - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi/node_modules/ansi-regex": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", - "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/strip-indent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", - "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "min-indent": "^1.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-literal": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.0.0.tgz", - "integrity": "sha512-TcccoMhJOM3OebGhSBEmp3UZ2SfDMZUEBdRA/9ynfLi8yYajyWX3JiXArcJt4Umh4vISpspkQIY8ZZoCqjbviA==", - "dev": true, - "license": "MIT", - "dependencies": { - "js-tokens": "^9.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/strip-literal/node_modules/js-tokens": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", - "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/sucrase": { - "version": "3.35.0", - "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", - "integrity": "sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.2", - "commander": "^4.0.0", - "glob": "^10.3.10", - "lines-and-columns": "^1.1.6", - "mz": "^2.7.0", - "pirates": "^4.0.1", - "ts-interface-checker": "^0.1.9" - }, - "bin": { - "sucrase": "bin/sucrase", - "sucrase-node": "bin/sucrase-node" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/supports-preserve-symlinks-flag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", - "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/syncpack": { - "version": "13.0.4", - "resolved": "https://registry.npmjs.org/syncpack/-/syncpack-13.0.4.tgz", - "integrity": "sha512-kJ9VlRxNCsBD5pJAE29oXeBYbPLhEySQmK4HdpsLv81I6fcDDW17xeJqMwiU3H7/woAVsbgq25DJNS8BeiN5+w==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.4.1", - "chalk-template": "^1.1.0", - "commander": "^13.1.0", - "cosmiconfig": "^9.0.0", - "effect": "^3.13.7", - "enquirer": "^2.4.1", - "fast-check": "^3.23.2", - "globby": "^14.1.0", - "jsonc-parser": "^3.3.1", - "minimatch": "9.0.5", - "npm-package-arg": "^12.0.2", - "ora": "^8.2.0", - "prompts": "^2.4.2", - "read-yaml-file": "^2.1.0", - "semver": "^7.7.1", - "tightrope": "0.2.0", - "ts-toolbelt": "^9.6.0" - }, - "bin": { - "syncpack": "dist/bin.js", - "syncpack-fix-mismatches": "dist/bin-fix-mismatches/index.js", - "syncpack-format": "dist/bin-format/index.js", - "syncpack-lint": "dist/bin-lint/index.js", - "syncpack-lint-semver-ranges": "dist/bin-lint-semver-ranges/index.js", - "syncpack-list": "dist/bin-list/index.js", - "syncpack-list-mismatches": "dist/bin-list-mismatches/index.js", - "syncpack-prompt": "dist/bin-prompt/index.js", - "syncpack-set-semver-ranges": "dist/bin-set-semver-ranges/index.js", - "syncpack-update": "dist/bin-update/index.js" - }, - "engines": { - "node": ">=18.18.0" - } - }, - "node_modules/syncpack/node_modules/commander": { - "version": "13.1.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-13.1.0.tgz", - "integrity": "sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/tailwind-merge": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.1.tgz", - "integrity": "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g==", - "dev": true, - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/dcastil" - } - }, - "node_modules/tailwind-scrollbar": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/tailwind-scrollbar/-/tailwind-scrollbar-4.0.2.tgz", - "integrity": "sha512-wAQiIxAPqk0MNTPptVe/xoyWi27y+NRGnTwvn4PQnbvB9kp8QUBiGl/wsfoVBHnQxTmhXJSNt9NHTmcz9EivFA==", - "dev": true, - "license": "MIT", - "dependencies": { - "prism-react-renderer": "^2.4.1" - }, - "engines": { - "node": ">=12.13.0" - }, - "peerDependencies": { - "tailwindcss": "4.x" - } - }, - "node_modules/tailwindcss": { - "version": "3.4.17", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.17.tgz", - "integrity": "sha512-w33E2aCvSDP0tW9RZuNXadXlkHXqFzSkQew/aIa2i/Sj8fThxwovwlXHSPXTbAHwEIhBFXAedUhP2tueAKP8Og==", - "dev": true, - "license": "MIT", - "dependencies": { - "@alloc/quick-lru": "^5.2.0", - "arg": "^5.0.2", - "chokidar": "^3.6.0", - "didyoumean": "^1.2.2", - "dlv": "^1.1.3", - "fast-glob": "^3.3.2", - "glob-parent": "^6.0.2", - "is-glob": "^4.0.3", - "jiti": "^1.21.6", - "lilconfig": "^3.1.3", - "micromatch": "^4.0.8", - "normalize-path": "^3.0.0", - "object-hash": "^3.0.0", - "picocolors": "^1.1.1", - "postcss": "^8.4.47", - "postcss-import": "^15.1.0", - "postcss-js": "^4.0.1", - "postcss-load-config": "^4.0.2", - "postcss-nested": "^6.2.0", - "postcss-selector-parser": "^6.1.2", - "resolve": "^1.22.8", - "sucrase": "^3.35.0" - }, - "bin": { - "tailwind": "lib/cli.js", - "tailwindcss": "lib/cli.js" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/tailwindcss/node_modules/postcss-selector-parser": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", - "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/thenify": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", - "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", - "dev": true, - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0" - } - }, - "node_modules/thenify-all": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", - "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", - "dev": true, - "license": "MIT", - "dependencies": { - "thenify": ">= 3.1.0 < 4" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/tightrope": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/tightrope/-/tightrope-0.2.0.tgz", - "integrity": "sha512-Kw36UHxJEELq2VUqdaSGR2/8cAsPgMtvX8uGVU6Jk26O66PhXec0A5ZnRYs47btbtwPDpXXF66+Fo3vimCM9aQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=16" - } - }, - "node_modules/tiny-invariant": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", - "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", - "dev": true, - "license": "MIT" - }, - "node_modules/tinybench": { - "version": "2.9.0", - "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", - "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==", - "dev": true, - "license": "MIT" - }, - "node_modules/tinyexec": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", - "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==", - "dev": true, - "license": "MIT" - }, - "node_modules/tinyglobby": { - "version": "0.2.14", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz", - "integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "fdir": "^6.4.4", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=12.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/SuperchupuDev" - } - }, - "node_modules/tinyglobby/node_modules/fdir": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", - "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12.0.0" - }, - "peerDependencies": { - "picomatch": "^3 || ^4" - }, - "peerDependenciesMeta": { - "picomatch": { - "optional": true - } - } - }, - "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/tinypool": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", - "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.0.0 || >=20.0.0" - } - }, - "node_modules/tinyrainbow": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-2.0.0.tgz", - "integrity": "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/tinyspy": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.3.tgz", - "integrity": "sha512-t2T/WLB2WRgZ9EpE4jgPJ9w+i66UZfDc8wHh0xrwiRNN+UwH98GIJkTeZqX9rg0i0ptwzqW+uYeIF0T4F8LR7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/totalist": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/totalist/-/totalist-3.0.1.tgz", - "integrity": "sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/ts-api-utils": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz", - "integrity": "sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18.12" - }, - "peerDependencies": { - "typescript": ">=4.8.4" - } - }, - "node_modules/ts-dedent": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/ts-dedent/-/ts-dedent-2.2.0.tgz", - "integrity": "sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.10" - } - }, - "node_modules/ts-interface-checker": { - "version": "0.1.13", - "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", - "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==", - "dev": true, - "license": "Apache-2.0" - }, - "node_modules/ts-toolbelt": { - "version": "9.6.0", - "resolved": "https://registry.npmjs.org/ts-toolbelt/-/ts-toolbelt-9.6.0.tgz", - "integrity": "sha512-nsZd8ZeNUzukXPlJmTBwUAuABDe/9qtVDelJeT/qW0ow3ZS3BsQJtNkan1802aM9Uf68/Y8ljw86Hu0h5IUW3w==", - "dev": true, - "license": "Apache-2.0" - }, - "node_modules/tsconfig-paths": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-4.2.0.tgz", - "integrity": "sha512-NoZ4roiN7LnbKn9QqE1amc9DJfzvZXxF4xDavcOWt1BPkdx+m+0gJuPM+S0vCe7zTJMYUP0R8pO2XMr+Y8oLIg==", - "dev": true, - "license": "MIT", - "dependencies": { - "json5": "^2.2.2", - "minimist": "^1.2.6", - "strip-bom": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "dev": true, - "license": "0BSD" - }, - "node_modules/typescript": { - "resolved": "../../node_modules/.pnpm/typescript@5.8.3/node_modules/typescript", - "link": true - }, - "node_modules/typescript-eslint": { - "resolved": "../../node_modules/.pnpm/typescript-eslint@8.38.0_eslint@9.31.0_jiti@2.4.2__typescript@5.8.3/node_modules/typescript-eslint", - "link": true - }, - "node_modules/undici-types": { - "version": "6.21.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", - "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/unicorn-magic": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.1.0.tgz", - "integrity": "sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/universalify": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", - "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 10.0.0" - } - }, - "node_modules/unplugin": { - "version": "1.16.1", - "resolved": "https://registry.npmjs.org/unplugin/-/unplugin-1.16.1.tgz", - "integrity": "sha512-4/u/j4FrCKdi17jaxuJA0jClGxB1AvU2hw/IuayPc4ay1XGaJs/rbb4v5WKwAjNifjmXK9PIFyuPiaK8azyR9w==", - "dev": true, - "license": "MIT", - "dependencies": { - "acorn": "^8.14.0", - "webpack-virtual-modules": "^0.6.2" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/update-browserslist-db": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz", - "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "escalade": "^3.2.0", - "picocolors": "^1.1.1" - }, - "bin": { - "update-browserslist-db": "cli.js" - }, - "peerDependencies": { - "browserslist": ">= 4.21.0" - } - }, - "node_modules/use-callback-ref": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz", - "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==", - "dev": true, - "license": "MIT", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sidecar": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz", - "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "detect-node-es": "^1.1.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sync-external-store": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.5.0.tgz", - "integrity": "sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "dev": true, - "license": "MIT" - }, - "node_modules/validate-npm-package-name": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/validate-npm-package-name/-/validate-npm-package-name-6.0.2.tgz", - "integrity": "sha512-IUoow1YUtvoBBC06dXs8bR8B9vuA3aJfmQNKMoaPG/OFsPmoQvw8xh+6Ye25Gx9DQhoEom3Pcu9MKHerm/NpUQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/vite": { - "resolved": "../../node_modules/.pnpm/vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terser@5.43.1_tsx@4.20.3_yaml@2.8.0/node_modules/vite", - "link": true - }, - "node_modules/vite-node": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz", - "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cac": "^6.7.14", - "debug": "^4.4.1", - "es-module-lexer": "^1.7.0", - "pathe": "^2.0.3", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" - }, - "bin": { - "vite-node": "vite-node.mjs" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/vite-plugin-dts": { - "resolved": "../../node_modules/.pnpm/vite-plugin-dts@4.5.4_@types+node@24.1.0_rollup@4.45.1_typescript@5.8.3_vite@6.3.5_@types+nod_ddgp24sr5pf6ze3b5hs7mrzr5e/node_modules/vite-plugin-dts", - "link": true - }, - "node_modules/vite-plugin-static-copy": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/vite-plugin-static-copy/-/vite-plugin-static-copy-3.1.2.tgz", - "integrity": "sha512-aVmYOzptLVOI2b1jL+cmkF7O6uhRv1u5fvOkQgbohWZp2CbR22kn9ZqkCUIt9umKF7UhdbsEpshn1rf4720QFg==", - "dev": true, - "license": "MIT", - "dependencies": { - "chokidar": "^3.6.0", - "fs-extra": "^11.3.0", - "p-map": "^7.0.3", - "picocolors": "^1.1.1", - "tinyglobby": "^0.2.14" - }, - "engines": { - "node": "^18.0.0 || >=20.0.0" - }, - "peerDependencies": { - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/vitest": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", - "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/chai": "^5.2.2", - "@vitest/expect": "3.2.4", - "@vitest/mocker": "3.2.4", - "@vitest/pretty-format": "^3.2.4", - "@vitest/runner": "3.2.4", - "@vitest/snapshot": "3.2.4", - "@vitest/spy": "3.2.4", - "@vitest/utils": "3.2.4", - "chai": "^5.2.0", - "debug": "^4.4.1", - "expect-type": "^1.2.1", - "magic-string": "^0.30.17", - "pathe": "^2.0.3", - "picomatch": "^4.0.2", - "std-env": "^3.9.0", - "tinybench": "^2.9.0", - "tinyexec": "^0.3.2", - "tinyglobby": "^0.2.14", - "tinypool": "^1.1.1", - "tinyrainbow": "^2.0.0", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0", - "vite-node": "3.2.4", - "why-is-node-running": "^2.3.0" - }, - "bin": { - "vitest": "vitest.mjs" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "@edge-runtime/vm": "*", - "@types/debug": "^4.1.12", - "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", - "@vitest/browser": "3.2.4", - "@vitest/ui": "3.2.4", - "happy-dom": "*", - "jsdom": "*" - }, - "peerDependenciesMeta": { - "@edge-runtime/vm": { - "optional": true - }, - "@types/debug": { - "optional": true - }, - "@types/node": { - "optional": true - }, - "@vitest/browser": { - "optional": true - }, - "@vitest/ui": { - "optional": true - }, - "happy-dom": { - "optional": true - }, - "jsdom": { - "optional": true - } - } - }, - "node_modules/vitest/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/webpack-virtual-modules": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/webpack-virtual-modules/-/webpack-virtual-modules-0.6.2.tgz", - "integrity": "sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, - "license": "ISC", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/why-is-node-running": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz", - "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==", - "dev": true, - "license": "MIT", - "dependencies": { - "siginfo": "^2.0.0", - "stackback": "0.0.2" - }, - "bin": { - "why-is-node-running": "cli.js" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", - "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.1.0", - "string-width": "^5.0.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs": { - "name": "wrap-ansi", - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/wrap-ansi-cjs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-styles": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", - "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/ws": { - "version": "8.18.3", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", - "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/yallist": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", - "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", - "dev": true, - "license": "ISC" - }, - "node_modules/yaml": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", - "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", - "dev": true, - "license": "ISC", - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - } - }, - "node_modules/yocto-queue": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.1.tgz", - "integrity": "sha512-AyeEbWOu/TAXdxlV9wmGcR0+yh2j3vYPGOECcIj2S7MkrLyC7ne+oye2BKTItt0ii2PHk4cDy+95+LshzbXnGg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12.20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/zustand": { - "version": "4.5.7", - "resolved": "https://registry.npmjs.org/zustand/-/zustand-4.5.7.tgz", - "integrity": "sha512-CHOUy7mu3lbD6o6LJLfllpjkzhHXSBlX8B9+qPddUsIfeF5S/UZ5q0kmCsnRqT1UHFQZchNFDDzMbQsuesHWlw==", - "dev": true, - "license": "MIT", - "dependencies": { - "use-sync-external-store": "^1.2.2" - }, - "engines": { - "node": ">=12.7.0" - }, - "peerDependencies": { - "@types/react": ">=16.8", - "immer": ">=9.0.6", - "react": ">=16.8" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "immer": { - "optional": true - }, - "react": { - "optional": true - } - } - } - } -} diff --git a/web/common/package.json b/web/common/package.json index 5f869c8a25..ef91337174 100644 --- a/web/common/package.json +++ b/web/common/package.json @@ -2,46 +2,55 @@ "name": "@tobikodata/sqlmesh-common", "version": "0.0.1", "devDependencies": { - "@eslint/js": "^9.31.0", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@storybook/addon-docs": "^9.1.5", - "@storybook/react-vite": "^9.1.5", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@testing-library/dom": "^10.4.1", - "@testing-library/jest-dom": "^6.6.3", - "@testing-library/react": "^16.3.0", - "@types/node": "^20.11.25", - "@types/react": "^18.3.23", - "@types/react-dom": "^18.3.7", - "@vitejs/plugin-react": "^4.7.0", - "@vitest/browser": "^3.2.4", - "@xyflow/react": "^12.8.4", - "autoprefixer": "^10.4.21", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "eslint": "^9.31.0", - "eslint-plugin-react-hooks": "^5.2.0", - "eslint-plugin-storybook": "^9.1.5", - "fuse.js": "^7.1.0", - "globals": "^16.3.0", - "lucide-react": "^0.542.0", - "playwright": "^1.54.1", - "postcss": "^8.5.6", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "storybook": "^9.1.5", - "syncpack": "^13.0.4", - "tailwind-merge": "^3.3.1", - "tailwind-scrollbar": "^3.1.0", - "tailwindcss": "^3.4.17", - "typescript": "^5.8.3", - "typescript-eslint": "^8.38.0", - "vite": "^6.3.5", - "vite-plugin-dts": "^4.5.4", - "vite-plugin-static-copy": "^3.1.1", - "vitest": "^3.2.4" + "@eslint/js": "9.31.0", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-tooltip": "1.2.8", + "@storybook/addon-docs": "9.1.5", + "@storybook/react-vite": "9.1.5", + "@tailwindcss/typography": "0.5.16", + "@tanstack/react-virtual": "3.13.12", + "@testing-library/dom": "10.4.1", + "@testing-library/jest-dom": "6.6.3", + "@testing-library/react": "16.3.0", + "@testing-library/user-event": "14.6.1", + "@types/dagre": "0.7.53", + "@types/lodash": "4.17.20", + "@types/node": "20.11.25", + "@types/react": "18.3.23", + "@types/react-dom": "18.3.7", + "@vitejs/plugin-react": "4.7.0", + "@vitest/browser": "3.2.4", + "@xyflow/react": "12.8.4", + "autoprefixer": "10.4.21", + "browserslist": "4.26.2", + "caniuse-lite": "1.0.30001746", + "class-variance-authority": "0.7.1", + "clsx": "2.1.1", + "cronstrue": "3.3.0", + "dagre": "0.8.5", + "deepmerge": "4.3.1", + "eslint": "9.31.0", + "eslint-plugin-react-hooks": "5.2.0", + "eslint-plugin-storybook": "9.1.5", + "fuse.js": "7.1.0", + "globals": "16.3.0", + "lodash": "4.17.21", + "lucide-react": "0.542.0", + "playwright": "1.54.1", + "postcss": "8.5.6", + "react": "18.3.1", + "react-dom": "18.3.1", + "storybook": "9.1.5", + "syncpack": "13.0.4", + "tailwind-merge": "3.3.1", + "tailwind-scrollbar": "3.1.0", + "tailwindcss": "3.4.17", + "typescript": "5.8.3", + "typescript-eslint": "8.38.0", + "vite": "6.3.5", + "vite-plugin-dts": "4.5.4", + "vite-plugin-static-copy": "3.1.1", + "vitest": "3.2.4" }, "exports": { ".": { @@ -56,7 +65,17 @@ }, "./styles/*": "./dist/styles/*", "./design/*": "./dist/styles/design/*", - "./configs/*": "./dist/configs/*" + "./configs/*": "./dist/configs/*", + "./lineage": { + "import": { + "types": "./dist/lineage/index.d.ts", + "default": "./dist/lineage/index.es.js" + }, + "require": { + "types": "./dist/lineage/index.d.ts", + "default": "./dist/lineage/index.umd.js" + } + } }, "files": [ "/dist" @@ -65,20 +84,24 @@ "main": "dist/sqlmesh-common.umd.js", "module": "dist/sqlmesh-common.es.js", "peerDependencies": { - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@xyflow/react": "^12.8.4", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "fuse.js": "^7.1.0", - "lucide-react": "^0.542.0", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "tailwind-merge": "^3.3.1", - "tailwind-scrollbar": "^3.1.0", - "tailwindcss": "^3.4.17" + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-tooltip": "1.2.8", + "@tailwindcss/typography": "0.5.16", + "@tanstack/react-virtual": "3.13.12", + "@xyflow/react": "12.8.4", + "class-variance-authority": "0.7.1", + "clsx": "2.1.1", + "cronstrue": "3.3.0", + "dagre": "0.8.5", + "deepmerge": "4.3.1", + "fuse.js": "7.1.0", + "lodash": "4.17.21", + "lucide-react": "0.542.0", + "react": "18.3.1", + "react-dom": "18.3.1", + "tailwind-merge": "3.3.1", + "tailwind-scrollbar": "3.1.0", + "tailwindcss": "3.4.17" }, "private": false, "repository": "TobikoData/sqlmesh", diff --git a/web/common/src/components/CopyButton/CopyButton.tsx b/web/common/src/components/CopyButton/CopyButton.tsx index 45aae3d817..3647121f82 100644 --- a/web/common/src/components/CopyButton/CopyButton.tsx +++ b/web/common/src/components/CopyButton/CopyButton.tsx @@ -36,6 +36,7 @@ export const CopyButton = React.forwardRef( onClick={e => { e.stopPropagation() copyToClipboard(text) + onClick?.(e) }} disabled={disabled || !!isCopied} {...props} diff --git a/web/common/src/components/Input/Input.css b/web/common/src/components/Input/Input.css new file mode 100644 index 0000000000..0baae3c6bb --- /dev/null +++ b/web/common/src/components/Input/Input.css @@ -0,0 +1,7 @@ +:root { + --color-input-background: var(--color-light); + --color-input-background-translucid: var(--color-neutral-5); + --color-input-foreground: var(--color-prose); + --color-input-placeholder: var(--color-neutral-400); + --color-input-border: var(--color-neutral-300); +} diff --git a/web/common/src/components/Input/Input.tsx b/web/common/src/components/Input/Input.tsx index 5c25b0a698..10ba151ab4 100644 --- a/web/common/src/components/Input/Input.tsx +++ b/web/common/src/components/Input/Input.tsx @@ -3,6 +3,8 @@ import { cn } from '@/utils' import type { Size } from '@/types' import { cva } from 'class-variance-authority' +import './Input.css' + export interface InputProps extends React.ComponentProps<'input'> { inputSize?: Size } @@ -15,9 +17,9 @@ export const Input = React.forwardRef( className={cn( inputVariants({ size: inputSize }), 'border items-center border-input-border bg-input-background text-input-foreground transition-colors placeholder:text-input-placeholder', - 'file:border-0 file:h-fit file:bg-background-lucid file:rounded-sm file:flex-col file:mt-0.5', + 'file:border-0 file:h-fit file:bg-background-translucid file:rounded-sm file:flex-col file:mt-0.5', type === 'file' && - 'bg-input-background-lucid border-[transparent] pl-1', + 'bg-input-background-translucid border-[transparent] pl-1', className, )} ref={ref} diff --git a/web/common/src/components/Lineage/Lineage.css b/web/common/src/components/Lineage/Lineage.css new file mode 100644 index 0000000000..7855ced10a --- /dev/null +++ b/web/common/src/components/Lineage/Lineage.css @@ -0,0 +1,3 @@ +.react-flow__node { + height: auto !important; +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts b/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts new file mode 100644 index 0000000000..227fc70394 --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts @@ -0,0 +1,101 @@ +import React from 'react' + +import { type PortId } from '../utils' + +export type LineageColumn = { + source?: string | null + expression?: string | null + models: Record +} + +export type ColumnLevelModelConnections< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, +> = Record +export type ColumnLevelDetails< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, +> = Omit & { + models: ColumnLevelModelConnections< + TAdjacencyListKey, + TAdjacencyListColumnKey + > +} +export type ColumnLevelConnections< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, +> = Record< + TAdjacencyListColumnKey, + ColumnLevelDetails +> +export type ColumnLevelLineageAdjacencyList< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, +> = Record< + TAdjacencyListKey, + ColumnLevelConnections +> + +export type ColumnLevelLineageContextValue< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, +> = { + adjacencyListColumnLevel: ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > + selectedColumns: Set + columnLevelLineage: Map< + TColumnID, + ColumnLevelLineageAdjacencyList + > + setColumnLevelLineage: React.Dispatch< + React.SetStateAction< + Map< + TColumnID, + ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > + > + > + > + showColumns: boolean + setShowColumns: React.Dispatch> + fetchingColumns: Set + setFetchingColumns: React.Dispatch>> +} + +export function getColumnLevelLineageContextInitial< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, +>() { + return { + adjacencyListColumnLevel: {}, + columnLevelLineage: new Map< + TColumnID, + ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > + >(), + setColumnLevelLineage: () => {}, + showColumns: false, + setShowColumns: () => {}, + selectedColumns: new Set(), + fetchingColumns: new Set(), + setFetchingColumns: () => {}, + } as const +} + +export type ColumnLevelLineageContextHook< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, +> = () => ColumnLevelLineageContextValue< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnID +> diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx new file mode 100644 index 0000000000..7b5e9e0ae0 --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx @@ -0,0 +1,257 @@ +import { + AlertCircle, + CircleOff, + FileCode, + FileMinus, + Workflow, +} from 'lucide-react' +import React from 'react' + +import { cn } from '@/utils' +import { NodeBadge } from '../node/NodeBadge' +import { NodePort } from '../node/NodePort' +import { type NodeId, type PortId } from '../utils' +import { + type ColumnLevelLineageAdjacencyList, + type ColumnLevelLineageContextHook, +} from './ColumnLevelLineageContext' +import { Tooltip } from '@/components/Tooltip/Tooltip' +import { Metadata } from '@/components/Metadata/Metadata' +import { HorizontalContainer } from '@/components/HorizontalContainer/HorizontalContainer' +import { Information } from '@/components/Typography/Information' +import { LoadingContainer } from '@/components/LoadingContainer/LoadingContainer' + +export function FactoryColumn< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TNodeID extends string = NodeId, + TColumnID extends string = PortId, +>( + useLineage: ColumnLevelLineageContextHook< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnID + >, +) { + return React.memo(function FactoryColumn({ + id, + nodeId, + modelName, + name, + description, + type, + className, + data, + isFetching = false, + error, + renderError, + renderExpression, + renderSource, + onClick, + onCancel, + }: { + id: TColumnID + nodeId: TNodeID + modelName: TAdjacencyListKey + name: TAdjacencyListColumnKey + type: string + description?: string | null + className?: string + data?: ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > + isFetching?: boolean + error?: Error | null + renderError?: (error: Error) => React.ReactNode + renderExpression?: (expression: string) => React.ReactNode + renderSource?: ( + source: string, + expression?: string | null, + ) => React.ReactNode + onClick?: () => void + onCancel?: () => void + }) { + const { selectedColumns, adjacencyListColumnLevel, columnLevelLineage } = + useLineage() + + const column = adjacencyListColumnLevel?.[modelName]?.[name] + const currentColumnLineage = columnLevelLineage.get(id) + const isSelectedColumn = selectedColumns.has(id) + const isTriggeredColumn = + column != null && currentColumnLineage != null && isSelectedColumn + + // Column that has no upstream connections + const isSourceColumn = React.useMemo(() => { + if (data == null) return false + + const models = Object.values(data) + + console.assert( + data[modelName], + `Model: ${modelName} not found in column lineage data`, + ) + console.assert( + data[modelName][name], + `Column: ${name} for model: ${modelName} not found in column lineage data`, + ) + + const columns = Object.values(data[modelName]) + + if (models.length > 1 || columns.length > 1) return false + + const columnModels = data[modelName][name].models + + return Object.keys(columnModels).length === 0 + }, [data, modelName, name]) + + const isDisabledColumn = isSourceColumn && !isSelectedColumn + + function renderColumnStates() { + if (isFetching) return <> + if (error && renderError) + return ( + + } + side="left" + sideOffset={20} + delayDuration={0} + className="bg-lineage-model-column-error-background p-0" + > + {renderError(error)} + + ) + + return ( + <> + {isSourceColumn ? ( + + ) : ( + + )} + {column?.source && renderSource && ( + + } + side="left" + sideOffset={20} + className="p-0 min-w-[30rem] max-w-xl bg-lineage-model-column-source-background" + delayDuration={0} + > + {renderSource(column.source, column.expression)} + + )} + {column?.expression && renderExpression && ( + + } + side="left" + sideOffset={20} + className="p-0 min-w-[30rem] max-w-xl bg-lineage-model-column-expression-background" + delayDuration={0} + > + {renderExpression(column.expression)} + + )} + + ) + } + + function renderColumn() { + return ( + + + {renderColumnStates()} + {description ? ( + + + + ) : ( + + )} + + + } + value={{type}} + className={cn( + 'relative overflow-visible group p-0', + isDisabledColumn && 'cursor-not-allowed', + className, + )} + /> + ) + } + + function handleSelectColumn(e: React.MouseEvent) { + e.stopPropagation() + e.preventDefault() + + if (isFetching) { + onCancel?.() + } else if ((isSelectedColumn || isSourceColumn) && !isTriggeredColumn) { + return + } else { + onClick?.() + } + } + + return isSelectedColumn ? ( + + {renderColumn()} + + ) : ( + renderColumn() + ) + }) +} + +function DisplayColumName({ name }: { name: string }) { + return ( + + {name} + + ) +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/help.ts b/web/common/src/components/Lineage/LineageColumnLevel/help.ts new file mode 100644 index 0000000000..30115450cd --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/help.ts @@ -0,0 +1,233 @@ +import { + toEdgeID, + toNodeID, + toPortID, + type LineageEdge, + type LineageEdgeData, + type EdgeId, + type NodeId, + type PortId, + type TransformEdgeFn, +} from '../utils' +import { + type ColumnLevelConnections, + type ColumnLevelDetails, + type ColumnLevelLineageAdjacencyList, +} from './ColumnLevelLineageContext' + +export const MAX_COLUMNS_TO_DISPLAY = 5 + +export function getAdjacencyListKeysFromColumnLineage< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, +>( + columnLineage: ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +) { + const adjacencyListKeys = new Set() + + const targets = Object.entries(columnLineage) as [ + TAdjacencyListKey, + ColumnLevelConnections, + ][] + + for (const [sourceModelName, targetColumns] of targets) { + adjacencyListKeys.add(sourceModelName) + + const targetConnections = Object.entries(targetColumns) as [ + TAdjacencyListColumnKey, + ColumnLevelDetails, + ][] + + for (const [, { models: sourceModels }] of targetConnections) { + for (const targetModelName of Object.keys( + sourceModels, + ) as TAdjacencyListKey[]) { + adjacencyListKeys.add(targetModelName) + } + } + } + + return Array.from(adjacencyListKeys) +} + +export function getEdgesFromColumnLineage< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TNodeID extends string = NodeId, + TPortID extends string = PortId, +>({ + columnLineage, + transformEdge, +}: { + columnLineage: ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > + transformEdge: TransformEdgeFn +}) { + const edges: LineageEdge[] = [] + const modelLevelEdgeIDs = new Map() + const targets = Object.entries(columnLineage || {}) as [ + TAdjacencyListKey, + ColumnLevelConnections, + ][] + + for (const [targetModelName, targetColumns] of targets) { + const targetConnections = Object.entries(targetColumns) as [ + TAdjacencyListColumnKey, + ColumnLevelDetails, + ][] + + const targetNodeId = toNodeID(targetModelName) + + for (const [ + targetColumnName, + { models: sourceModels }, + ] of targetConnections) { + const sources = Object.entries(sourceModels) as [ + TAdjacencyListKey, + TAdjacencyListColumnKey[], + ][] + + for (const [sourceModelName, sourceColumns] of sources) { + const sourceNodeId = toNodeID(sourceModelName) + + modelLevelEdgeIDs.set( + toEdgeID(sourceModelName, targetModelName), + [sourceNodeId, targetNodeId], + ) + + sourceColumns.forEach(sourceColumnName => { + const edgeId = toEdgeID( + sourceModelName, + sourceColumnName, + targetModelName, + targetColumnName, + ) + const sourceColumnId = toPortID( + sourceModelName, + sourceColumnName, + ) + const targetColumnId = toPortID( + targetModelName, + targetColumnName, + ) + + edges.push( + transformEdge( + 'port', + edgeId, + sourceNodeId, + targetNodeId, + sourceColumnId, + targetColumnId, + ), + ) + }) + } + } + } + + Array.from(modelLevelEdgeIDs.entries()).forEach( + ([edgeId, [sourceNodeId, targetNodeId]]) => { + edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) + }, + ) + return edges +} + +export function getConnectedColumnsIDs< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, +>( + adjacencyList: ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +) { + const connectedColumns = new Set() + const targets = Object.entries(adjacencyList) as [ + TAdjacencyListKey, + ColumnLevelConnections, + ][] + + for (const [sourceModelName, targetColumns] of targets) { + const targetConnections = Object.entries(targetColumns) as [ + TAdjacencyListColumnKey, + ColumnLevelDetails, + ][] + + for (const [ + sourceColumnName, + { models: sourceModels }, + ] of targetConnections) { + connectedColumns.add(toPortID(sourceModelName, sourceColumnName)) + + const sources = Object.entries(sourceModels) as [ + TAdjacencyListKey, + TAdjacencyListColumnKey[], + ][] + + for (const [targetModelName, sourceColumns] of sources) { + sourceColumns.forEach(sourceColumnName => { + connectedColumns.add(toPortID(targetModelName, sourceColumnName)) + }) + } + } + } + return connectedColumns +} + +export function calculateNodeColumnsCount(columnsCount: number = 0) { + return Math.min(columnsCount, MAX_COLUMNS_TO_DISPLAY) +} + +export function calculateSelectedColumnsHeight( + selectedColumnsCount: number = 0, +) { + const selectedColumnsTopSeparatorHeight = 1 + const selectedColumnSeparatorHeight = 1 + const selectedColumnHeight = 24 // tailwind h-6 + const selectedColumnsSeparators = + selectedColumnsCount > 1 ? selectedColumnsCount - 1 : 0 + + return [ + selectedColumnsCount > 0 ? selectedColumnsTopSeparatorHeight : 0, + selectedColumnsCount * selectedColumnHeight, + selectedColumnsCount > 0 + ? selectedColumnsSeparators * selectedColumnSeparatorHeight + : 0, + ].reduce((acc, h) => acc + h, 0) +} + +export function calculateColumnsHeight({ + columnsCount = 0, + hasColumnsFilter = true, +}: { + columnsCount: number + hasColumnsFilter?: boolean +}) { + const hasColumns = columnsCount > 0 + const columnHeight = 24 // tailwind h-6 + const columnsTopSeparator = 1 + const columnSeparator = 1 + const columnsContainerPadding = 4 + const columnsPadding = 4 + const columnsFilterHeight = hasColumnsFilter && hasColumns ? columnHeight : 0 + const columnsSeparators = columnsCount > 1 ? columnsCount - 1 : 0 + + return [ + hasColumns ? columnsSeparators * columnSeparator : 0, + columnsCount * columnHeight, + hasColumns ? columnsPadding * 2 : 0, + hasColumns ? columnsContainerPadding * 2 : 0, + hasColumns ? columnsFilterHeight : 0, + hasColumns ? columnsTopSeparator : 0, + ].reduce((acc, height) => acc + height, 0) +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts b/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts new file mode 100644 index 0000000000..da1a6b8ee8 --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts @@ -0,0 +1,49 @@ +import merge from 'deepmerge' +import React from 'react' + +import { type PortId } from '../utils' +import { type ColumnLevelLineageAdjacencyList } from './ColumnLevelLineageContext' +import { + getAdjacencyListKeysFromColumnLineage, + getConnectedColumnsIDs, +} from './help' + +export function useColumnLevelLineage< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, +>( + columnLevelLineage: Map< + TColumnID, + ColumnLevelLineageAdjacencyList + >, +) { + const adjacencyListColumnLevel = React.useMemo(() => { + return merge.all(Array.from(columnLevelLineage.values()), { + arrayMerge: (dest, source) => Array.from(new Set([...dest, ...source])), + }) as ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > + }, [columnLevelLineage]) + + const selectedColumns = React.useMemo(() => { + return getConnectedColumnsIDs< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnID + >(adjacencyListColumnLevel) + }, [adjacencyListColumnLevel]) + + const adjacencyListKeysColumnLevel = React.useMemo(() => { + return adjacencyListColumnLevel != null + ? getAdjacencyListKeysFromColumnLineage(adjacencyListColumnLevel) + : [] + }, [adjacencyListColumnLevel]) + + return { + adjacencyListColumnLevel, + selectedColumns, + adjacencyListKeysColumnLevel, + } +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/useColumns.tsx b/web/common/src/components/Lineage/LineageColumnLevel/useColumns.tsx new file mode 100644 index 0000000000..3ed1278a5c --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/useColumns.tsx @@ -0,0 +1,58 @@ +import React from 'react' + +import { toPortID } from '../utils' +import { type PortId } from '../utils' + +export interface Column { + data_type: string + description?: string | null +} + +export function useColumns< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumn extends Column, + TColumnID extends string = PortId, +>( + selectedPorts: Set, + adjacencyListKey: TAdjacencyListKey, + rawColumns?: Record, +) { + const columnNames = React.useMemo(() => { + return new Set( + Object.keys(rawColumns ?? {}).map(column => + toPortID(adjacencyListKey, column), + ), + ) + }, [rawColumns, adjacencyListKey]) + + const [selectedColumns, columns] = React.useMemo(() => { + const selected = [] + const output = [] + + for (const [column, info] of Object.entries(rawColumns ?? {}) as [ + TAdjacencyListColumnKey, + TColumn, + ][]) { + const columnId = toPortID(adjacencyListKey, column) + const nodeColumn = { + name: column, + ...info, + id: columnId, + } + + if (selectedPorts.has(columnId)) { + selected.push(nodeColumn) + } else { + output.push(nodeColumn) + } + } + return [selected, output] + }, [rawColumns, adjacencyListKey, selectedPorts]) + + return { + columns, + columnNames, + selectedColumns, + } +} diff --git a/web/common/src/components/Lineage/LineageContext.ts b/web/common/src/components/Lineage/LineageContext.ts new file mode 100644 index 0000000000..6f4ee7e165 --- /dev/null +++ b/web/common/src/components/Lineage/LineageContext.ts @@ -0,0 +1,103 @@ +import React from 'react' + +import { + type EdgeId, + type LineageEdge, + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, + ZOOM_THRESHOLD, +} from './utils' + +export interface LineageContextValue< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +> { + // Node selection + showOnlySelectedNodes: boolean + setShowOnlySelectedNodes: React.Dispatch> + selectedNodes: Set + setSelectedNodes: React.Dispatch>> + selectedEdges: Set + setSelectedEdges: React.Dispatch>> + selectedNodeId: TNodeID | null + setSelectedNodeId: React.Dispatch> + + // Layout + isBuildingLayout: boolean + setIsBuildingLayout: React.Dispatch> + zoom: number + setZoom: React.Dispatch> + + // Nodes and Edges + edges: LineageEdge[] + setEdges: React.Dispatch< + React.SetStateAction[]> + > + nodes: LineageNode[] + nodesMap: LineageNodesMap + setNodesMap: React.Dispatch>> + currentNode: LineageNode | null +} + +export function getInitial< + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, +>() { + return { + showOnlySelectedNodes: false, + setShowOnlySelectedNodes: () => {}, + selectedNodes: new Set(), + setSelectedNodes: () => {}, + selectedEdges: new Set(), + setSelectedEdges: () => {}, + selectedNodeId: null, + setSelectedNodeId: () => {}, + zoom: ZOOM_THRESHOLD, + setZoom: () => {}, + edges: [], + setEdges: () => {}, + nodes: [], + nodesMap: {}, + setNodesMap: () => {}, + isBuildingLayout: false, + setIsBuildingLayout: () => {}, + currentNode: null, + } +} + +export type LineageContextHook< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +> = () => LineageContextValue + +export function createLineageContext< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, + TLineageContextValue extends LineageContextValue< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + > = LineageContextValue, +>(initial: TLineageContextValue) { + const LineageContext = React.createContext(initial) + + return { + Provider: LineageContext.Provider, + useLineage: () => React.useContext(LineageContext), + } +} diff --git a/web/common/src/components/Lineage/LineageControlButton.tsx b/web/common/src/components/Lineage/LineageControlButton.tsx new file mode 100644 index 0000000000..5f1abaa952 --- /dev/null +++ b/web/common/src/components/Lineage/LineageControlButton.tsx @@ -0,0 +1,43 @@ +import { ControlButton } from '@xyflow/react' + +import { cn } from '@/utils' +import { Tooltip } from '../Tooltip/Tooltip' + +export function LineageControlButton({ + text, + onClick, + disabled = false, + className, + children, +}: { + text: string + children: React.ReactNode + onClick?: (e: React.MouseEvent) => void + disabled?: boolean + className?: string +}) { + return ( + + + {children} + + + } + > + {text} + + ) +} diff --git a/web/common/src/components/Lineage/LineageControlIcon.tsx b/web/common/src/components/Lineage/LineageControlIcon.tsx new file mode 100644 index 0000000000..2c7f01e48c --- /dev/null +++ b/web/common/src/components/Lineage/LineageControlIcon.tsx @@ -0,0 +1,42 @@ +import React from 'react' + +import { cn } from '@/utils' + +export interface LineageControlIconProps extends React.SVGProps { + Icon: React.ElementType + size?: number + className?: string +} + +export const LineageControlIcon = React.forwardRef< + HTMLSpanElement, + LineageControlIconProps +>( + ( + { + Icon, + size = 16, + className, + ...props + }: { + Icon: React.ElementType + size?: number + className?: string + }, + ref, + ) => { + return ( + + ) + }, +) + +LineageControlIcon.displayName = 'LineageControlIcon' diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx new file mode 100644 index 0000000000..411ace4e65 --- /dev/null +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -0,0 +1,401 @@ +import { + Background, + BackgroundVariant, + Controls, + type EdgeTypes, + type NodeTypes, + ReactFlow, + ReactFlowProvider, + type SetCenter, + getConnectedEdges, + getIncomers, + getOutgoers, + useReactFlow, + useViewport, +} from '@xyflow/react' + +import '@xyflow/react/dist/style.css' +import './Lineage.css' + +import { debounce } from 'lodash' +import { CircuitBoard, Crosshair, LocateFixed, RotateCcw } from 'lucide-react' +import React from 'react' + +import { cn } from '@/utils' +import { type LineageContextHook } from './LineageContext' +import { LineageControlButton } from './LineageControlButton' +import { LineageControlIcon } from './LineageControlIcon' +import { + DEFAULT_ZOOM, + type LineageEdge, + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + MAX_ZOOM, + MIN_ZOOM, + NODES_TRESHOLD, + NODES_TRESHOLD_ZOOM, + type NodeId, + type EdgeId, + ZOOM_THRESHOLD, + type PortId, +} from './utils' +import { VerticalContainer } from '../VerticalContainer/VerticalContainer' +import { MessageContainer } from '../MessageContainer/MessageContainer' +import { LoadingContainer } from '../LoadingContainer/LoadingContainer' + +export function LineageLayout< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>({ + nodeTypes, + edgeTypes, + className, + controls, + useLineage, + onNodeClick, + onNodeDoubleClick, +}: { + useLineage: LineageContextHook< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + > + nodeTypes?: NodeTypes + edgeTypes?: EdgeTypes + className?: string + controls?: + | React.ReactNode + | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) + onNodeClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void + onNodeDoubleClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void +}) { + return ( + + + + ) +} + +function LineageLayoutBase< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>({ + nodeTypes, + edgeTypes, + className, + controls, + useLineage, + onNodeClick, + onNodeDoubleClick, +}: { + useLineage: LineageContextHook< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + > + nodeTypes?: NodeTypes + edgeTypes?: EdgeTypes + className?: string + controls?: + | React.ReactNode + | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) + onNodeClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void + onNodeDoubleClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void +}) { + const { zoom: viewportZoom } = useViewport() + const { setCenter } = useReactFlow() + + const { + isBuildingLayout, + currentNode, + zoom, + nodes, + edges, + nodesMap, + showOnlySelectedNodes, + selectedNodeId, + setZoom, + setSelectedNodeId, + setShowOnlySelectedNodes, + setSelectedNodes, + setSelectedEdges, + } = useLineage() + + const updateZoom = React.useMemo(() => debounce(setZoom, 200), [setZoom]) + + const zoomToCurrentNode = React.useCallback( + (zoom: number = DEFAULT_ZOOM) => { + if (currentNode) { + setCenter(currentNode.position.x, currentNode.position.y, { + zoom, + duration: 0, + }) + } + }, + [currentNode, setCenter], + ) + + const zoomToSelectedNode = React.useCallback( + (zoom: number = DEFAULT_ZOOM) => { + const node = selectedNodeId ? nodesMap[selectedNodeId] : null + if (node) { + setCenter(node.position.x, node.position.y, { + zoom, + duration: 0, + }) + } + }, + [nodesMap, selectedNodeId, setCenter], + ) + + const getAllIncomers = React.useCallback( + ( + node: LineageNode, + visited: Set = new Set(), + ): LineageNode[] => { + if (visited.has(node.id)) return [] + + visited.add(node.id) + + return Array.from( + new Set>([ + node, + ...getIncomers(node, nodes, edges) + .map(n => getAllIncomers(n, visited)) + .flat(), + ]), + ) + }, + [nodes, edges], + ) + + const getAllOutgoers = React.useCallback( + ( + node: LineageNode, + visited: Set = new Set(), + ): LineageNode[] => { + if (visited.has(node.id)) return [] + + visited.add(node.id) + + return Array.from( + new Set>([ + node, + ...getOutgoers(node, nodes, edges) + .map(n => getAllOutgoers(n, visited)) + .flat(), + ]), + ) + }, + [nodes, edges], + ) + + React.useEffect(() => { + if (selectedNodeId == null) { + setShowOnlySelectedNodes(false) + setSelectedNodes(new Set()) + setSelectedEdges(new Set()) + + return + } + + const node = selectedNodeId ? nodesMap[selectedNodeId] : null + + if (node == null) { + setSelectedNodeId(null) + return + } + + const incomers = getAllIncomers(node) + const outgoers = getAllOutgoers(node) + const connectedNodes = [...incomers, ...outgoers] + + if (currentNode) { + connectedNodes.push(currentNode) + } + + const connectedEdges = getConnectedEdges< + LineageNode, + LineageEdge + >(connectedNodes, edges) + const selectedNodes = new Set(connectedNodes.map(node => node.id)) + const selectedEdges = new Set( + connectedEdges.reduce((acc, edge) => { + if ([edge.source, edge.target].every(id => selectedNodes.has(id))) { + edge.zIndex = 2 + acc.add(edge.id) + } else { + edge.zIndex = 1 + } + return acc + }, new Set()), + ) + + setSelectedNodes(selectedNodes) + setSelectedEdges(selectedEdges) + }, [ + currentNode, + selectedNodeId, + setSelectedNodes, + setSelectedEdges, + getAllIncomers, + getAllOutgoers, + setShowOnlySelectedNodes, + setSelectedNodeId, + ]) + + React.useEffect(() => { + if (selectedNodeId) { + zoomToSelectedNode(zoom) + } else { + zoomToCurrentNode(zoom) + } + }, [zoomToCurrentNode, zoomToSelectedNode]) + + React.useEffect(() => { + updateZoom(viewportZoom) + }, [updateZoom, viewportZoom]) + + React.useEffect(() => { + if (currentNode?.id) { + setSelectedNodeId(currentNode.id) + } else if (selectedNodeId) { + // setSelectedNodeId(selectedNodeId); + } else { + const node = nodes.length > 0 ? nodes[nodes.length - 1] : null + + if (node) { + setCenter(node.position.x, node.position.y, { + zoom: zoom, + duration: 0, + }) + } + } + }, [currentNode?.id, setSelectedNodeId, nodes, setCenter]) + + return ( + + {isBuildingLayout && ( + + + Building layout... + + + )} + , + LineageEdge + > + className="shrink-0" + nodes={nodes} + edges={edges} + nodeTypes={nodeTypes} + edgeTypes={edgeTypes} + nodesDraggable={false} + nodesConnectable={false} + zoomOnDoubleClick={false} + panOnScroll={true} + zoomOnScroll={true} + minZoom={nodes.length > NODES_TRESHOLD ? NODES_TRESHOLD_ZOOM : MIN_ZOOM} + maxZoom={MAX_ZOOM} + fitView={false} + nodeOrigin={[0.5, 0.5]} + onlyRenderVisibleElements + onNodeClick={onNodeClick} + onNodeDoubleClick={onNodeDoubleClick} + > + {zoom > ZOOM_THRESHOLD && ( + + )} + + {currentNode && ( + zoomToCurrentNode(DEFAULT_ZOOM)} + disabled={isBuildingLayout} + > + + + )} + {selectedNodeId && ( + <> + setShowOnlySelectedNodes(!showOnlySelectedNodes)} + disabled={isBuildingLayout} + > + + + zoomToSelectedNode(DEFAULT_ZOOM)} + disabled={isBuildingLayout} + > + + + + )} + {controls && typeof controls === 'function' + ? controls({ setCenter }) + : controls} + + + + ) +} diff --git a/web/common/src/components/Lineage/edge/EdgeWithGradient.tsx b/web/common/src/components/Lineage/edge/EdgeWithGradient.tsx new file mode 100644 index 0000000000..2a1da5eed1 --- /dev/null +++ b/web/common/src/components/Lineage/edge/EdgeWithGradient.tsx @@ -0,0 +1,114 @@ +import { + type Edge, + type EdgeProps, + getBezierPath, + getSmoothStepPath, + getStraightPath, +} from '@xyflow/react' +import React, { useId } from 'react' + +import { type EdgeId, type LineageEdgeData, type PathType } from '../utils' + +export interface EdgeData extends LineageEdgeData { + startColor?: string + endColor?: string + strokeWidth?: number + pathType?: PathType +} + +export const EdgeWithGradient = React.memo( + ({ + id, + sourceX, + sourceY, + targetX, + targetY, + sourcePosition, + targetPosition, + style, + data, + markerEnd, + }: EdgeProps>) => { + const edgeId = id as EdgeId + + const gradientId = useId() + const startColor = data?.startColor || 'var(--color-lineage-edge)' + const endColor = data?.endColor || 'var(--color-lineage-edge)' + const pathType = data?.pathType || 'bezier' + const strokeWidth = data?.strokeWidth || 4 + const edgePath = getEdgePath(pathType) + + function getEdgePath(pathType: PathType) { + return { + straight: getStraightPath({ + sourceX, + sourceY, + targetX, + targetY, + }), + smoothstep: getSmoothStepPath({ + sourceX, + sourceY, + sourcePosition, + targetX, + targetY, + targetPosition, + borderRadius: 10, + }), + bezier: getBezierPath({ + sourceX, + sourceY, + sourcePosition, + targetX, + targetY, + targetPosition, + }), + step: getSmoothStepPath({ + sourceX, + sourceY, + sourcePosition, + targetX, + targetY, + targetPosition, + borderRadius: 0, + }), + }[pathType] + } + + return ( + <> + + + + + + + + + ) + }, +) diff --git a/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx b/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx new file mode 100644 index 0000000000..a89027ffef --- /dev/null +++ b/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx @@ -0,0 +1,58 @@ +import React from 'react' + +import { type LineageContextHook } from '../LineageContext' +import { + type EdgeId, + type LineageNodeData, + type NodeId, + type PortId, +} from '../utils' +import { EdgeWithGradient, type EdgeData } from './EdgeWithGradient' +import type { Edge, EdgeProps } from '@xyflow/react' + +export function FactoryEdgeWithGradient< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends EdgeData = EdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>( + useLineage: LineageContextHook< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + >, +) { + return React.memo(({ data, id, ...props }: EdgeProps>) => { + const edgeId = id as TEdgeID + + const { selectedEdges } = useLineage() + + const isActive = selectedEdges.has(edgeId) + + let startColor = 'var(--color-lineage-edge)' + let endColor = 'var(--color-lineage-edge)' + + if (isActive && data?.startColor) { + startColor = data?.startColor + } + + if (isActive && data?.endColor) { + endColor = data?.endColor + } + + return ( + + ) + }) +} diff --git a/web/common/src/components/Lineage/help.test.ts b/web/common/src/components/Lineage/help.test.ts new file mode 100644 index 0000000000..51dcb12108 --- /dev/null +++ b/web/common/src/components/Lineage/help.test.ts @@ -0,0 +1,768 @@ +import { describe, expect, test } from 'vitest' +import { Position } from '@xyflow/react' + +import { + getOnlySelectedNodes, + getTransformedNodes, + getTransformedModelEdges, + getTransformedModelEdgesSourceTargets, + getTransformedModelEdgesTargetSources, + createNode, + calculateNodeBaseHeight, + calculateNodeDetailsHeight, + createEdge, +} from './help' +import type { + LineageNode, + LineageNodesMap, + LineageNodeData, + LineageDetails, + LineageAdjacencyList, + NodeId, + EdgeId, + PortId, +} from './utils' +import { toNodeID, toEdgeID } from './utils' + +describe('Lineage Help Functions', () => { + describe('getOnlySelectedNodes', () => { + test('should return only selected nodes from the node map', () => { + const nodesMap = { + node1: { + id: 'node1' as NodeId, + position: { x: 0, y: 0 }, + data: {}, + }, + node2: { + id: 'node2' as NodeId, + position: { x: 100, y: 100 }, + data: {}, + }, + node3: { + id: 'node3' as NodeId, + position: { x: 200, y: 200 }, + data: {}, + }, + } + + const selectedNodes = new Set([ + 'node1' as NodeId, + 'node3' as NodeId, + ]) + const result = getOnlySelectedNodes(nodesMap, selectedNodes) + + expect(Object.keys(result)).toHaveLength(2) + expect(result).toHaveProperty('node1') + expect(result).toHaveProperty('node3') + expect(result).not.toHaveProperty('node2') + }) + + test('should return empty object when no nodes are selected', () => { + const nodesMap = { + node1: { + id: 'node1' as NodeId, + position: { x: 0, y: 0 }, + data: {}, + }, + } + + const selectedNodes = new Set() + const result = getOnlySelectedNodes(nodesMap, selectedNodes) + + expect(Object.keys(result)).toHaveLength(0) + }) + + test('should handle empty node map', () => { + const nodesMap: LineageNodesMap = {} + const selectedNodes = new Set(['node1' as NodeId]) + const result = getOnlySelectedNodes(nodesMap, selectedNodes) + + expect(Object.keys(result)).toHaveLength(0) + }) + }) + + describe('getTransformedNodes', () => { + test('should transform nodes using the provided transform function', () => { + const adjacencyListKeys = ['model1', 'model2'] + const lineageDetails: LineageDetails< + string, + { name: string; type: string } + > = { + model1: { name: 'Model 1', type: 'table' }, + model2: { name: 'Model 2', type: 'view' }, + } + + const transformNode = ( + nodeId: NodeId, + data: { name: string; type: string }, + ) => + ({ + id: nodeId, + position: { x: 0, y: 0 }, + data: { label: data.name, nodeType: data.type }, + }) as LineageNode<{ label: string; nodeType: string }> + + const result = getTransformedNodes( + adjacencyListKeys, + lineageDetails, + transformNode, + ) + + const encodedModel1 = toNodeID('model1') + const encodedModel2 = toNodeID('model2') + + expect(Object.keys(result)).toHaveLength(2) + expect(result[encodedModel1]).toEqual({ + id: encodedModel1, + position: { x: 0, y: 0 }, + data: { label: 'Model 1', nodeType: 'table' }, + }) + expect(result[encodedModel2]).toEqual({ + id: encodedModel2, + position: { x: 0, y: 0 }, + data: { label: 'Model 2', nodeType: 'view' }, + }) + }) + + test('should handle empty adjacency list', () => { + const adjacencyListKeys: string[] = [] + const lineageDetails: LineageDetails = {} + const transformNode = (nodeId: NodeId, data: { name: string }) => + ({ + id: nodeId, + position: { x: 0, y: 0 }, + data: { label: data.name }, + }) as LineageNode<{ label: string }> + + const result = getTransformedNodes( + adjacencyListKeys, + lineageDetails, + transformNode, + ) + + expect(Object.keys(result)).toHaveLength(0) + }) + }) + + describe('getTransformedModelEdges', () => { + test('should transform edges using the provided transform function', () => { + const adjacencyListKeys = ['model1', 'model2', 'model3'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: ['model2', 'model3'], + model2: ['model3'], + model3: [], + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdges( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(3) + + const model1Id = toNodeID('model1') + const model2Id = toNodeID('model2') + const model3Id = toNodeID('model3') + + expect(result[0]).toEqual({ + id: toEdgeID('model1', 'model2'), + source: model1Id, + target: model2Id, + type: 'edge', + zIndex: 1, + }) + expect(result[1]).toEqual({ + id: toEdgeID('model1', 'model3'), + source: model1Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + expect(result[2]).toEqual({ + id: toEdgeID('model2', 'model3'), + source: model2Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + }) + + test('should skip edges where target is not in adjacency list', () => { + const adjacencyListKeys = ['model1'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: ['model2'], // model2 is not in the adjacency list + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdges( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle empty adjacency list', () => { + const adjacencyListKeys: string[] = [] + const lineageAdjacencyList: LineageAdjacencyList = {} + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdges( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle nodes with no targets', () => { + const adjacencyListKeys = ['model1', 'model2'] + const lineageAdjacencyList = { + model1: [], + model2: null, + } as unknown as LineageAdjacencyList + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdges( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + }) + + describe('getTransformedModelEdgesSourceTargets', () => { + test('should transform edges from source to targets using the provided transform function', () => { + const adjacencyListKeys = ['model1', 'model2', 'model3'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: ['model2', 'model3'], + model2: ['model3'], + model3: [], + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(3) + + const model1Id = toNodeID('model1') + const model2Id = toNodeID('model2') + const model3Id = toNodeID('model3') + + expect(result[0]).toEqual({ + id: toEdgeID('model1', 'model2'), + source: model1Id, + target: model2Id, + type: 'edge', + zIndex: 1, + }) + expect(result[1]).toEqual({ + id: toEdgeID('model1', 'model3'), + source: model1Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + expect(result[2]).toEqual({ + id: toEdgeID('model2', 'model3'), + source: model2Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + }) + + test('should skip edges where target is not in adjacency list', () => { + const adjacencyListKeys = ['model1'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: ['model2'], // model2 is not in the adjacency list + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle empty adjacency list', () => { + const adjacencyListKeys: string[] = [] + const lineageAdjacencyList: LineageAdjacencyList = {} + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle nodes with no targets', () => { + const adjacencyListKeys = ['model1', 'model2'] + const lineageAdjacencyList = { + model1: [], + model2: null, + } as unknown as LineageAdjacencyList + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + }) + + describe('getTransformedModelEdgesTargetSources', () => { + test('should transform edges from target to sources using the provided transform function', () => { + const adjacencyListKeys = ['model1', 'model2', 'model3'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: [], + model2: ['model1'], + model3: ['model1', 'model2'], + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(3) + + const model1Id = toNodeID('model1') + const model2Id = toNodeID('model2') + const model3Id = toNodeID('model3') + + expect(result[0]).toEqual({ + id: toEdgeID('model1', 'model2'), + source: model1Id, + target: model2Id, + type: 'edge', + zIndex: 1, + }) + expect(result[1]).toEqual({ + id: toEdgeID('model1', 'model3'), + source: model1Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + expect(result[2]).toEqual({ + id: toEdgeID('model2', 'model3'), + source: model2Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + }) + + test('should skip edges where source is not in adjacency list', () => { + const adjacencyListKeys = ['model2'] + const lineageAdjacencyList: LineageAdjacencyList = { + model2: ['model1'], // model1 is not in the adjacency list + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle empty adjacency list', () => { + const adjacencyListKeys: string[] = [] + const lineageAdjacencyList: LineageAdjacencyList = {} + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle nodes with no sources', () => { + const adjacencyListKeys = ['model1', 'model2'] + const lineageAdjacencyList = { + model1: [], + model2: null, + } as unknown as LineageAdjacencyList + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + }) + + describe('createNode', () => { + test('should create a node with provided data', () => { + const nodeId = 'test-node' as NodeId + const data = { label: 'Test Node', value: 42 } + const node = createNode('custom', nodeId, data) + + expect(node).toEqual({ + id: nodeId, + sourcePosition: Position.Right, + targetPosition: Position.Left, + width: 300, // DEFAULT_NODE_WIDTH + height: 32, // DEFAULT_NODE_HEIGHT + data, + type: 'custom', + hidden: false, + position: { x: 0, y: 0 }, + zIndex: 10, + }) + }) + + test('should create a node with minimal data', () => { + const nodeId = 'minimal' as NodeId + const data = {} + const node = createNode('default', nodeId, data) + + expect(node.id).toBe(nodeId) + expect(node.type).toBe('default') + expect(node.data).toEqual({}) + expect(node.hidden).toBe(false) + }) + }) + + describe('calculateNodeBaseHeight', () => { + test('should calculate base height with no additional components', () => { + const height = calculateNodeBaseHeight({}) + // border (2*2) + base (28) = 32 + expect(height).toBe(32) + }) + + test('should include footer height when specified', () => { + const height = calculateNodeBaseHeight({ includeNodeFooterHeight: true }) + // border (2*2) + base (28) + footer (20) = 52 + expect(height).toBe(52) + }) + + test('should include ceiling height when specified', () => { + const height = calculateNodeBaseHeight({ includeCeilingHeight: true }) + // border (2*2) + base (28) + ceiling (20) + ceilingGap (4) = 56 + expect(height).toBe(56) + }) + + test('should include floor height when specified', () => { + const height = calculateNodeBaseHeight({ includeFloorHeight: true }) + // border (2*2) + base (28) + floor (20) + floorGap (4) = 56 + expect(height).toBe(56) + }) + + test('should include all components when specified', () => { + const height = calculateNodeBaseHeight({ + includeNodeFooterHeight: true, + includeCeilingHeight: true, + includeFloorHeight: true, + }) + // border (2*2) + base (28) + footer (20) + ceiling (20) + ceilingGap (4) + floor (20) + floorGap (4) = 100 + expect(height).toBe(100) + }) + }) + + describe('calculateNodeDetailsHeight', () => { + test('should return 0 when no details', () => { + const height = calculateNodeDetailsHeight({}) + expect(height).toBe(0) + }) + + test('should calculate height for single detail', () => { + const height = calculateNodeDetailsHeight({ nodeDetailsCount: 1 }) + // 1 * 24 (nodeOptionHeight) = 24 + expect(height).toBe(24) + }) + + test('should calculate height for multiple details with separators', () => { + const height = calculateNodeDetailsHeight({ nodeDetailsCount: 3 }) + // 3 * 24 (nodeOptionHeight) + 2 * 1 (separators between items) = 74 + expect(height).toBe(74) + }) + + test('should handle zero details count', () => { + const height = calculateNodeDetailsHeight({ nodeDetailsCount: 0 }) + expect(height).toBe(0) + }) + }) + + describe('createEdge', () => { + test('should create edge with basic parameters', () => { + const edgeId = 'edge1' as EdgeId + const sourceId = 'source1' as NodeId + const targetId = 'target1' as NodeId + + const edge = createEdge('straight', edgeId, sourceId, targetId) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'straight', + sourceHandle: undefined, + targetHandle: undefined, + data: undefined, + zIndex: 1, + }) + }) + + test('should create edge with handles', () => { + const edgeId = 'edge2' as EdgeId + const sourceId = 'source2' as NodeId + const targetId = 'target2' as NodeId + const sourceHandleId = 'handle1' as PortId + const targetHandleId = 'handle2' as PortId + + const edge = createEdge( + 'bezier', + edgeId, + sourceId, + targetId, + sourceHandleId, + targetHandleId, + ) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'bezier', + sourceHandle: sourceHandleId, + targetHandle: targetHandleId, + data: undefined, + zIndex: 1, + }) + }) + + test('should create edge with data', () => { + const edgeId = 'edge3' as EdgeId + const sourceId = 'source3' as NodeId + const targetId = 'target3' as NodeId + const data = { label: 'Connection', weight: 5 } + + const edge = createEdge( + 'smoothstep', + edgeId, + sourceId, + targetId, + undefined, + undefined, + data, + ) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'smoothstep', + sourceHandle: undefined, + targetHandle: undefined, + data, + zIndex: 1, + }) + }) + + test('should create edge with all parameters', () => { + const edgeId = 'edge4' as EdgeId + const sourceId = 'source4' as NodeId + const targetId = 'target4' as NodeId + const sourceHandleId = 'handle3' as PortId + const targetHandleId = 'handle4' as PortId + const data = { animated: true } + + const edge = createEdge( + 'step', + edgeId, + sourceId, + targetId, + sourceHandleId, + targetHandleId, + data, + ) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'step', + sourceHandle: sourceHandleId, + targetHandle: targetHandleId, + data, + zIndex: 1, + }) + }) + }) +}) diff --git a/web/common/src/components/Lineage/help.ts b/web/common/src/components/Lineage/help.ts new file mode 100644 index 0000000000..a052ff707b --- /dev/null +++ b/web/common/src/components/Lineage/help.ts @@ -0,0 +1,270 @@ +import { Position } from '@xyflow/react' + +import { + DEFAULT_NODE_HEIGHT, + DEFAULT_NODE_WIDTH, + type EdgeId, + type LineageAdjacencyList, + type LineageDetails, + type LineageEdge, + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, + toEdgeID, + toNodeID, + type TransformEdgeFn, + type TransformNodeFn, +} from './utils' + +export function getOnlySelectedNodes< + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +>(nodeMaps: LineageNodesMap, selectedNodes: Set) { + return (Object.values(nodeMaps) as LineageNode[]).reduce( + (acc, node) => + selectedNodes.has(node.id) ? { ...acc, [node.id]: node } : acc, + {} as LineageNodesMap, + ) +} + +export function getTransformedNodes< + TAdjacencyListKey extends string, + TDetailsNode, + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +>( + adjacencyListKeys: TAdjacencyListKey[], + lineageDetails: LineageDetails, + transformNode: TransformNodeFn, +): LineageNodesMap { + const nodesCount = adjacencyListKeys.length + const nodesMap: LineageNodesMap = Object.create(null) + + for (let i = 0; i < nodesCount; i++) { + const adjacencyListKey = adjacencyListKeys[i] + const encodedNodeId = toNodeID(adjacencyListKey) + nodesMap[encodedNodeId] = transformNode( + encodedNodeId, + lineageDetails[adjacencyListKey], + ) + } + + return nodesMap +} + +export function getTransformedModelEdges< + TAdjacencyListKey extends string, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>( + adjacencyListKeys: TAdjacencyListKey[], + lineageAdjacencyList: LineageAdjacencyList, + transformEdge: TransformEdgeFn, +) { + const nodesCount = adjacencyListKeys.length + + if (nodesCount === 0) return [] + + const edges = [] + + for (let i = 0; i < nodesCount; i++) { + const adjacencyListKey = adjacencyListKeys[i] + const nodeId = toNodeID(adjacencyListKey) + const targets = lineageAdjacencyList[adjacencyListKey] + const targetsCount = targets?.length || 0 + + if (targets == null || targetsCount < 1) continue + + for (let j = 0; j < targetsCount; j++) { + const target = targets[j] + + if (!(target in lineageAdjacencyList)) continue + + const edgeId = toEdgeID(adjacencyListKey, target) + + edges.push( + transformEdge('edge', edgeId, nodeId, toNodeID(target)), + ) + } + } + + return edges +} + +export function getTransformedModelEdgesSourceTargets< + TAdjacencyListKey extends string, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>( + adjacencyListKeys: TAdjacencyListKey[], + lineageAdjacencyList: LineageAdjacencyList, + transformEdge: TransformEdgeFn, +) { + const nodesCount = adjacencyListKeys.length + + if (nodesCount === 0) return [] + + const edges = [] + + for (let i = 0; i < nodesCount; i++) { + const sourceAdjacencyListKey = adjacencyListKeys[i] + const sourceNodeId = toNodeID(sourceAdjacencyListKey) + const targets = lineageAdjacencyList[sourceAdjacencyListKey] + const targetsCount = targets?.length || 0 + + if (targets == null || targetsCount < 1) continue + + for (let j = 0; j < targetsCount; j++) { + const targetAdjacencyListKey = targets[j] + + if (!(targetAdjacencyListKey in lineageAdjacencyList)) continue + + const edgeId = toEdgeID( + sourceAdjacencyListKey, + targetAdjacencyListKey, + ) + const targetNodeId = toNodeID(targetAdjacencyListKey) + + edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) + } + } + + return edges +} + +export function getTransformedModelEdgesTargetSources< + TAdjacencyListKey extends string, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>( + adjacencyListKeys: TAdjacencyListKey[], + lineageAdjacencyList: LineageAdjacencyList, + transformEdge: TransformEdgeFn, +) { + const nodesCount = adjacencyListKeys.length + + if (nodesCount === 0) return [] + + const edges = [] + + for (let i = 0; i < nodesCount; i++) { + const targetAdjacencyListKey = adjacencyListKeys[i] + const targetNodeId = toNodeID(targetAdjacencyListKey) + const sources = lineageAdjacencyList[targetAdjacencyListKey] + const sourcesCount = sources?.length || 0 + + if (sources == null || sourcesCount < 1) continue + + for (let j = 0; j < sourcesCount; j++) { + const sourceAdjacencyListKey = sources[j] + + if (!(sourceAdjacencyListKey in lineageAdjacencyList)) continue + + const edgeId = toEdgeID( + sourceAdjacencyListKey, + targetAdjacencyListKey, + ) + const sourceNodeId = toNodeID(sourceAdjacencyListKey) + + edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) + } + } + + return edges +} + +export function createNode< + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +>(type: string, nodeId: TNodeID, data: TNodeData) { + return { + id: nodeId, + sourcePosition: Position.Right, + targetPosition: Position.Left, + width: DEFAULT_NODE_WIDTH, + height: DEFAULT_NODE_HEIGHT, + data, + type, + hidden: false, + position: { x: 0, y: 0 }, + zIndex: 10, + } +} + +export function calculateNodeBaseHeight({ + includeNodeFooterHeight = false, + includeCeilingHeight = false, + includeFloorHeight = false, +}: { + includeNodeFooterHeight?: boolean + includeCeilingHeight?: boolean + includeFloorHeight?: boolean +}) { + const border = 2 + const footerHeight = 20 // tailwind h-5 + const base = 28 // tailwind h-7 + const ceilingHeight = 20 // tailwind h-5 + const floorHeight = 20 // tailwind h-5 + + const ceilingGap = 4 + const floorGap = 4 + + return [ + border * 2, + base, + includeNodeFooterHeight ? footerHeight : 0, + includeCeilingHeight ? ceilingHeight + ceilingGap : 0, + includeFloorHeight ? floorHeight + floorGap : 0, + ].reduce((acc, h) => acc + h, 0) +} + +export function calculateNodeDetailsHeight({ + nodeDetailsCount = 0, +}: { + nodeDetailsCount?: number +}) { + const nodeOptionHeight = 24 // tailwind h-6 + + const nodeOptionsSeparator = 1 + const nodeOptionsSeparators = nodeDetailsCount > 1 ? nodeDetailsCount - 1 : 0 + + return [ + nodeOptionsSeparators * nodeOptionsSeparator, + nodeDetailsCount * nodeOptionHeight, + ].reduce((acc, h) => acc + h, 0) +} + +export function createEdge< + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>( + type: string, + edgeId: TEdgeID, + sourceId: TNodeID, + targetId: TNodeID, + sourceHandleId?: TPortID, + targetHandleId?: TPortID, + data?: TEdgeData, +): LineageEdge { + return { + id: edgeId, + source: sourceId, + target: targetId, + type, + sourceHandle: sourceHandleId ? sourceHandleId : undefined, + targetHandle: targetHandleId ? targetHandleId : undefined, + data, + zIndex: 1, + } +} diff --git a/web/common/src/components/Lineage/index.ts b/web/common/src/components/Lineage/index.ts new file mode 100644 index 0000000000..0fbc17047c --- /dev/null +++ b/web/common/src/components/Lineage/index.ts @@ -0,0 +1,28 @@ +export * from './utils' +export * from './LineageLayout' +export * from './LineageContext' +export * from './LineageControlButton' +export * from './LineageControlIcon' +export * from './help' +export * from './node/base-handle' +export * from './node/base-node' +export * from './node/NodeContainer' +export * from './node/NodeBase' +export * from './node/NodeDivider' +export * from './node/NodeHandleIcon' +export * from './node/NodeHandles' +export * from './node/NodeHandle' +export * from './node/NodeHeader' +export * from './node/NodePorts' +export * from './node/NodePort' +export * from './node/NodeAppendix' +export * from './node/NodeBadge' +export * from './node/useNodeMetadata' +export * from './edge/EdgeWithGradient' +export * from './edge/FactoryEdgeWithGradient' +export * from './layout/dagreLayout' +export * from './LineageColumnLevel/ColumnLevelLineageContext' +export * from './LineageColumnLevel/FactoryColumn' +export * from './LineageColumnLevel/useColumns' +export * from './LineageColumnLevel/useColumnLevelLineage' +export * from './LineageColumnLevel/help' diff --git a/web/common/src/components/Lineage/layout/dagreLayout.ts b/web/common/src/components/Lineage/layout/dagreLayout.ts new file mode 100644 index 0000000000..83714a2220 --- /dev/null +++ b/web/common/src/components/Lineage/layout/dagreLayout.ts @@ -0,0 +1,90 @@ +import { + DEFAULT_NODE_WIDTH, + type EdgeId, + type LineageEdge, + type LineageEdgeData, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, +} from '../utils' +import dagre from 'dagre' + +export function buildLayout< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>({ + edges, + nodesMap, +}: { + edges: LineageEdge[] + nodesMap: LineageNodesMap +}) { + const nodes = Object.values(nodesMap) + const nodeCount = nodes.length + const edgeCount = edges.length + + if (nodeCount === 0) + return { + edges: [], + nodesMap: {}, + } + + const g = new dagre.graphlib.Graph({ + compound: true, + multigraph: true, + directed: true, + }) + + g.setGraph({ + rankdir: 'LR', + nodesep: 0, + ranksep: 48, + edgesep: 0, + ranker: 'longest-path', + }) + + g.setDefaultEdgeLabel(() => ({})) + + // Building layout already heavy operation, so trying to optimize it a bit + for (let i = 0; i < edgeCount; i++) { + g.setEdge(edges[i].source, edges[i].target) + } + + for (let i = 0; i < nodeCount; i++) { + const node = nodes[i] + g.setNode(node.id, { + width: node.width || DEFAULT_NODE_WIDTH, + height: node.height || 0, + }) + } + + dagre.layout(g) + + // Building layout already heavy operation, so trying to optimize it a bit + for (let i = 0; i < nodeCount; i++) { + const node = nodes[i] + const width = node.width || DEFAULT_NODE_WIDTH + const height = node.height || 0 + const nodeId = node.id as NodeId + const nodeWithPosition = g.node(nodeId) + const halfWidth = width / 2 + const halfHeight = height / 2 + + nodesMap[nodeId] = { + ...node, + position: { + x: nodeWithPosition.x - halfWidth, + y: nodeWithPosition.y - halfHeight, + }, + } + } + + return { + edges, + nodesMap, + } +} diff --git a/web/common/src/components/Lineage/layout/help.ts b/web/common/src/components/Lineage/layout/help.ts new file mode 100644 index 0000000000..91b3ebc4a3 --- /dev/null +++ b/web/common/src/components/Lineage/layout/help.ts @@ -0,0 +1,100 @@ +import { + type LineageEdge, + type LineageEdgeData, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, + type LayoutedGraph, + type EdgeId, +} from '../utils' + +const DEFAULT_TIMEOUT = 1000 * 60 // 1 minute + +let workerInstance: Worker | null = null + +export function getWorker(url: URL): Worker { + if (workerInstance) return workerInstance + + workerInstance = new Worker(url, { type: 'module' }) + + return workerInstance +} + +export async function getLayoutedGraph< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>( + edges: LineageEdge[], + nodesMap: LineageNodesMap, + workerUrl: URL, +): Promise> { + let timeoutId: NodeJS.Timeout | null = null + + return new Promise((resolve, reject) => { + const nodes = Object.values(nodesMap) + + if (nodes.length === 0) return resolve({ edges: [], nodesMap: {} }) + + const worker = getWorker(workerUrl) + + if (worker == null) + return errorHandler(new ErrorEvent('Failed to create worker')) + + timeoutId = setTimeout( + () => errorHandler(new ErrorEvent('Layout calculation timed out')), + DEFAULT_TIMEOUT, + ) + + worker.addEventListener('message', handler) + worker.addEventListener('error', errorHandler) + + try { + worker.postMessage({ edges, nodesMap } as LayoutedGraph< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + >) + } catch (postError) { + errorHandler(postError as ErrorEvent) + } + + function handler( + event: MessageEvent< + LayoutedGraph & { + error: ErrorEvent + } + >, + ) { + cleanup() + + if (event.data.error) return errorHandler(event.data.error) + + resolve(event.data) + } + + function errorHandler(error: ErrorEvent) { + cleanup() + reject(error) + } + + function cleanup() { + if (timeoutId) { + clearTimeout(timeoutId) + timeoutId = null + } + worker?.removeEventListener('message', handler) + worker?.removeEventListener('error', errorHandler) + } + }) +} + +export function cleanupLayoutWorker(): void { + workerInstance?.terminate() + workerInstance = null +} diff --git a/web/common/src/components/Lineage/node/NodeAppendix.tsx b/web/common/src/components/Lineage/node/NodeAppendix.tsx new file mode 100644 index 0000000000..76d64affed --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeAppendix.tsx @@ -0,0 +1,44 @@ +import { cva, type VariantProps } from 'class-variance-authority' +import { forwardRef, type HTMLAttributes } from 'react' + +import { cn } from '@/utils' + +const appendixVariants = cva( + 'node-appendix absolute flex w-full flex-col items-center', + { + variants: { + position: { + top: '-translate-y-[100%] -my-1', + bottom: 'top-[100%] my-1', + left: '-left-[100%] -mx-1', + right: 'left-[100%] mx-1', + }, + }, + defaultVariants: { + position: 'top', + }, + }, +) + +export interface NodeAppendixProps + extends HTMLAttributes, + VariantProps { + className?: string + position?: 'top' | 'bottom' | 'left' | 'right' +} + +export const NodeAppendix = forwardRef( + ({ children, className, position, ...props }, ref) => { + return ( +
+ {children} +
+ ) + }, +) + +NodeAppendix.displayName = 'NodeAppendix' diff --git a/web/common/src/components/Lineage/node/NodeBadge.tsx b/web/common/src/components/Lineage/node/NodeBadge.tsx new file mode 100644 index 0000000000..943e5e9267 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeBadge.tsx @@ -0,0 +1,23 @@ +import React from 'react' + +import { cn } from '@/utils' +import { Badge, type BadgeProps } from '@/components/Badge/Badge' + +export const NodeBadge = React.forwardRef( + ({ className, children, ...props }, ref) => { + return ( + + {children} + + ) + }, +) +NodeBadge.displayName = 'NodeBadge' diff --git a/web/common/src/components/Lineage/node/NodeBase.tsx b/web/common/src/components/Lineage/node/NodeBase.tsx new file mode 100644 index 0000000000..78033a4099 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeBase.tsx @@ -0,0 +1,31 @@ +import { type NodeProps } from '@xyflow/react' +import React from 'react' + +import { BaseNode } from '@/components/Lineage/node/base-node' +import { cn } from '@/utils' + +export interface NodeBaseProps extends NodeProps { + className?: string + children?: React.ReactNode +} + +export const NodeBase = React.memo( + React.forwardRef( + ({ className, children }, ref) => { + return ( + + {children} + + ) + }, + ), +) +NodeBase.displayName = 'NodeBase' diff --git a/web/common/src/components/Lineage/node/NodeContainer.tsx b/web/common/src/components/Lineage/node/NodeContainer.tsx new file mode 100644 index 0000000000..0506771eae --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeContainer.tsx @@ -0,0 +1,21 @@ +import React from 'react' + +import { cn } from '@/utils' +import { VerticalContainer } from '@/components/VerticalContainer/VerticalContainer' + +export const NodeContainer = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, children, ...props }, ref) => { + return ( + + {children} + + ) +}) +NodeContainer.displayName = 'NodeContainer' diff --git a/web/common/src/components/Lineage/node/NodeDetail.tsx b/web/common/src/components/Lineage/node/NodeDetail.tsx new file mode 100644 index 0000000000..96b8cafbb8 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeDetail.tsx @@ -0,0 +1,26 @@ +import { Metadata, cn } from '@tobikodata/sqlmesh-common' + +import { NodeDivider } from './NodeDivider' + +export function NodeDetail({ + label, + value, + hasDivider = true, + className, +}: { + label: string + value: string + hasDivider?: boolean + className?: string +}) { + return ( + <> + {hasDivider && } + + + ) +} diff --git a/web/common/src/components/Lineage/node/NodeDivider.tsx b/web/common/src/components/Lineage/node/NodeDivider.tsx new file mode 100644 index 0000000000..5f35f0c7e6 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeDivider.tsx @@ -0,0 +1,3 @@ +export function NodeDivider() { + return
+} diff --git a/web/common/src/components/Lineage/node/NodeHandle.tsx b/web/common/src/components/Lineage/node/NodeHandle.tsx new file mode 100644 index 0000000000..e737ff4327 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHandle.tsx @@ -0,0 +1,31 @@ +import { Position } from '@xyflow/react' +import React from 'react' + +import { cn } from '@/utils' +import { BaseHandle } from './base-handle' + +export const NodeHandle = React.memo(function NodeHandle({ + type, + id, + children, + className, + ...props +}: { + type: 'target' | 'source' + id: string + children: React.ReactNode + className?: string +}) { + return ( + + {children} + + ) +}) diff --git a/web/common/src/components/Lineage/node/NodeHandleIcon.tsx b/web/common/src/components/Lineage/node/NodeHandleIcon.tsx new file mode 100644 index 0000000000..d7335a69b3 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHandleIcon.tsx @@ -0,0 +1,22 @@ +import { ArrowRight } from 'lucide-react' + +import { cn } from '@/utils' + +export function NodeHandleIcon({ + className, + iconSize = 20, +}: { + className?: string + iconSize?: number +}) { + return ( + + ) +} diff --git a/web/common/src/components/Lineage/node/NodeHandles.tsx b/web/common/src/components/Lineage/node/NodeHandles.tsx new file mode 100644 index 0000000000..71bee716b4 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHandles.tsx @@ -0,0 +1,50 @@ +import React from 'react' + +import { cn } from '@/utils' +import { HorizontalContainer } from '@/components/HorizontalContainer/HorizontalContainer' +import { NodeHandle } from './NodeHandle' + +export const NodeHandles = React.memo(function NodeHandles({ + leftIcon, + rightIcon, + leftId, + rightId, + className, + handleClassName, + children, +}: { + leftId?: string + rightId?: string + className?: string + handleClassName?: string + children: React.ReactNode + leftIcon: React.ReactNode + rightIcon: React.ReactNode +}) { + return ( + + {leftId && ( + + {leftIcon} + + )} + {children} + {rightId && ( + + {rightIcon} + + )} + + ) +}) diff --git a/web/common/src/components/Lineage/node/NodeHeader.tsx b/web/common/src/components/Lineage/node/NodeHeader.tsx new file mode 100644 index 0000000000..334af2c5ed --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHeader.tsx @@ -0,0 +1,28 @@ +import { type HTMLAttributes, forwardRef } from 'react' + +import { cn } from '@/utils' + +/* NODE HEADER -------------------------------------------------------------- */ + +export type NodeHeaderProps = HTMLAttributes + +/** + * A container for a consistent header layout intended to be used inside the + * `` component. + */ +export const NodeHeader = forwardRef( + ({ className, ...props }, ref) => { + return ( +
+ ) + }, +) + +NodeHeader.displayName = 'NodeHeader' diff --git a/web/common/src/components/Lineage/node/NodePort.tsx b/web/common/src/components/Lineage/node/NodePort.tsx new file mode 100644 index 0000000000..ecf6206382 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodePort.tsx @@ -0,0 +1,64 @@ +import { useNodeConnections, useUpdateNodeInternals } from '@xyflow/react' +import React from 'react' + +import { cn } from '@/utils' +import { type NodeId, type PortId } from '../utils' +import { NodeHandles } from './NodeHandles' + +export const NodePort = React.memo(function NodePort< + TPortId extends string = PortId, + TNodeID extends string = NodeId, +>({ + id, + nodeId, + className, + children, +}: { + id: TPortId + nodeId: TNodeID + className?: string + children: React.ReactNode +}) { + const updateNodeInternals = useUpdateNodeInternals() + + const sources = useNodeConnections({ + id: nodeId, + handleType: 'source', + handleId: id, + }) + const targets = useNodeConnections({ + id: nodeId, + handleType: 'target', + handleId: id, + }) + + const leftId = targets.length > 0 ? id : undefined + const rightId = sources.length > 0 ? id : undefined + + React.useEffect(() => { + if (leftId || rightId) { + updateNodeInternals(nodeId) + } + }, [updateNodeInternals, nodeId, leftId, rightId]) + + return ( + + } + rightIcon={ + + } + leftId={leftId} + rightId={rightId} + className={cn( + 'relative overflow-visible group p-0 bg-lineage-node-port-background h-auto', + className, + )} + handleClassName="absolute" + > + {children} + + ) +}) diff --git a/web/common/src/components/Lineage/node/NodePorts.tsx b/web/common/src/components/Lineage/node/NodePorts.tsx new file mode 100644 index 0000000000..f417dea9e4 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodePorts.tsx @@ -0,0 +1,44 @@ +import { cn } from '@/utils' +import { VirtualList } from '@/components/VirtualList/VirtualList' +import { FilterableList } from '@/components/VirtualList/FilterableList' +import type { IFuseOptions } from 'fuse.js' + +export function NodePorts({ + ports, + estimatedListItemHeight, + renderPort, + className, + isFilterable = true, + filterOptions, +}: { + ports: TPort[] + estimatedListItemHeight: number + renderPort: (port: TPort) => React.ReactNode + className?: string + isFilterable?: boolean + filterOptions?: IFuseOptions +}) { + function renderVirtualList(items: TPort[]) { + return ( + renderPort(item)} + className={cn(!isFilterable && className)} + /> + ) + } + return isFilterable ? ( + + {renderVirtualList} + + ) : ( + renderVirtualList(ports) + ) +} diff --git a/web/common/src/components/Lineage/node/base-handle.tsx b/web/common/src/components/Lineage/node/base-handle.tsx new file mode 100644 index 0000000000..76d66bdeaf --- /dev/null +++ b/web/common/src/components/Lineage/node/base-handle.tsx @@ -0,0 +1,27 @@ +import { Handle, type HandleProps } from '@xyflow/react' +import { forwardRef } from 'react' +import type { ForwardRefExoticComponent, RefAttributes } from 'react' + +import { cn } from '@/utils' + +export const BaseHandle: ForwardRefExoticComponent< + HandleProps & RefAttributes +> = forwardRef( + ({ className, children, ...props }, ref) => { + return ( + + {children} + + ) + }, +) + +BaseHandle.displayName = 'BaseHandle' diff --git a/web/common/src/components/Lineage/node/base-node.tsx b/web/common/src/components/Lineage/node/base-node.tsx new file mode 100644 index 0000000000..d349ca601a --- /dev/null +++ b/web/common/src/components/Lineage/node/base-node.tsx @@ -0,0 +1,17 @@ +import { type HTMLAttributes, forwardRef } from 'react' + +import { cn } from '@/utils' + +export const BaseNode = forwardRef< + HTMLDivElement, + HTMLAttributes & { selected?: boolean } +>(({ className, ...props }, ref) => ( +
+)) + +BaseNode.displayName = 'BaseNode' diff --git a/web/common/src/components/Lineage/node/useNodeMetadata.tsx b/web/common/src/components/Lineage/node/useNodeMetadata.tsx new file mode 100644 index 0000000000..3601b752fd --- /dev/null +++ b/web/common/src/components/Lineage/node/useNodeMetadata.tsx @@ -0,0 +1,43 @@ +import { + type Node, + type NodeProps as ReactFlowNodeProps, + useNodeConnections, +} from '@xyflow/react' + +import { type LineageNode, type LineageNodeData, type NodeId } from '../utils' + +export type NodeProps = + ReactFlowNodeProps> + +export function useNodeMetadata< + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +>( + nodeId: TNodeID, + currentNode: LineageNode | null, + selectedNodeId: TNodeID | null, + selectedNodes: Set, +) { + const sources = useNodeConnections({ + id: nodeId, + handleType: 'source', + }) + const targets = useNodeConnections({ + id: nodeId, + handleType: 'target', + }) + + const leftId = targets.length > 0 ? nodeId : undefined + const rightId = sources.length > 0 ? nodeId : undefined + const isCurrent = currentNode?.id === nodeId + const isSelected = selectedNodeId === nodeId + const isActive = selectedNodes.has(nodeId) + + return { + leftId, + rightId, + isCurrent, + isSelected, + isActive, + } +} diff --git a/web/common/src/components/Lineage/stories/Lineage.stories.tsx b/web/common/src/components/Lineage/stories/Lineage.stories.tsx new file mode 100644 index 0000000000..4ad8ca9f8b --- /dev/null +++ b/web/common/src/components/Lineage/stories/Lineage.stories.tsx @@ -0,0 +1,192 @@ +import type { LineageAdjacencyList, LineageDetails } from '../utils' + +import { ModelLineage } from './ModelLineage' +import type { ModelLineageNodeDetails, ModelName } from './ModelLineageContext' + +export default { + title: 'Components/Lineage', +} + +export const LineageModel = () => { + return ( +
+ + + } + lineageDetails={ + { + 'sqlmesh.sushi.raw_orders': { + name: 'sqlmesh.sushi.raw_orders', + display_name: 'sushi.raw_orders', + identifier: '123456789', + version: '123456789', + dialect: 'bigquery', + cron: '0 0 * * *', + owner: 'admin', + kind: 'INCREMENTAL_BY_TIME', + model_type: 'python', + tags: ['test', 'tag', 'another tag'], + columns: { + user_id: { + data_type: 'STRING', + description: 'node', + }, + event_id: { + data_type: 'STRING', + description: 'node', + }, + created_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + }, + }, + 'sqlmesh.sushi.orders': { + name: 'sqlmesh.sushi.orders', + display_name: 'sushi.orders', + identifier: '123456789', + version: '123456789', + dialect: 'bigquery', + cron: '0 0 * * *', + owner: 'admin', + kind: 'INCREMENTAL_BY_TIME', + model_type: 'sql', + tags: ['test', 'tag', 'another tag'], + columns: { + user_id: { + data_type: 'STRING', + description: 'node', + columnLineageData: { + 'sqlmesh.sushi.orders': { + user_id: { + source: 'sqlmesh.sushi.raw_orders', + expression: + 'select user_id from sqlmesh.sushi.raw_orders', + models: { + 'sqlmesh.sushi.raw_orders': ['user_id'], + }, + }, + }, + }, + }, + event_id: { + data_type: 'STRING', + description: 'node', + columnLineageData: { + 'sqlmesh.sushi.orders': { + event_id: { + models: { + 'sqlmesh.sushi.raw_orders': ['event_id'], + }, + }, + }, + }, + }, + product_id: { + data_type: 'STRING', + description: 'node', + }, + customer_id: { + data_type: 'STRING', + description: 'node', + }, + updated_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + deleted_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + expired_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + start_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + end_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + created_ts: { + data_type: 'TIMESTAMP', + description: 'node', + }, + }, + }, + } as LineageDetails + } + className="rounded-2xl" + /> +
+ ) +} diff --git a/web/common/src/components/Lineage/stories/ModelLineage.tsx b/web/common/src/components/Lineage/stories/ModelLineage.tsx new file mode 100644 index 0000000000..d704b6a209 --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelLineage.tsx @@ -0,0 +1,416 @@ +import { debounce } from 'lodash' +import { Focus, Rows2, Rows3 } from 'lucide-react' +import React from 'react' + +import { type ColumnLevelLineageAdjacencyList } from '../LineageColumnLevel/ColumnLevelLineageContext' +import { + MAX_COLUMNS_TO_DISPLAY, + calculateColumnsHeight, + calculateNodeColumnsCount, + calculateSelectedColumnsHeight, + getEdgesFromColumnLineage, +} from '../LineageColumnLevel/help' +import { useColumnLevelLineage } from '../LineageColumnLevel/useColumnLevelLineage' +import { LineageControlButton } from '../LineageControlButton' +import { LineageControlIcon } from '../LineageControlIcon' +import { LineageLayout } from '../LineageLayout' +import { FactoryEdgeWithGradient } from '../edge/FactoryEdgeWithGradient' +import { + toNodeID, + toPortID, + type LineageAdjacencyList, + type LineageDetails, +} from '../utils' +import { + calculateNodeBaseHeight, + calculateNodeDetailsHeight, + createEdge, + createNode, + getOnlySelectedNodes, + getTransformedModelEdgesSourceTargets, + getTransformedNodes, +} from '../help' +import { + type LineageEdge, + type LineageNodesMap, + ZOOM_THRESHOLD, +} from '../utils' +import { + type EdgeData, + ModelLineageContext, + type ModelLineageNodeDetails, + type ModelName, + type ColumnName, + type NodeData, + useModelLineage, + type ModelNodeId, + type ModelColumnID, + type ModelEdgeId, + type NodeType, +} from './ModelLineageContext' +import { ModelNode } from './ModelNode' +import { getNodeTypeColorVar } from './help' +import { EdgeWithGradient } from '../edge/EdgeWithGradient' +import { cleanupLayoutWorker, getLayoutedGraph } from '../layout/help' + +const nodeTypes = { + node: ModelNode, +} +const edgeTypes = { + edge: FactoryEdgeWithGradient(useModelLineage), + port: EdgeWithGradient, +} + +export const ModelLineage = ({ + selectedModelName, + adjacencyList, + lineageDetails, + className, +}: { + adjacencyList: LineageAdjacencyList + lineageDetails: LineageDetails + selectedModelName?: ModelName + className?: string +}) => { + const [zoom, setZoom] = React.useState(ZOOM_THRESHOLD) + const [isBuildingLayout, setIsBuildingLayout] = React.useState(false) + const [edges, setEdges] = React.useState< + LineageEdge[] + >([]) + const [nodesMap, setNodesMap] = React.useState< + LineageNodesMap + >({}) + const [showOnlySelectedNodes, setShowOnlySelectedNodes] = + React.useState(false) + const [selectedNodes, setSelectedNodes] = React.useState>( + new Set(), + ) + const [selectedEdges, setSelectedEdges] = React.useState>( + new Set(), + ) + const [selectedNodeId, setSelectedNodeId] = + React.useState(null) + + const [showColumns, setShowColumns] = React.useState(false) + const [columnLevelLineage, setColumnLevelLineage] = React.useState< + Map> + >(new Map()) + const [fetchingColumns, setFetchingColumns] = React.useState< + Set + >(new Set()) + + const { + adjacencyListColumnLevel, + selectedColumns, + adjacencyListKeysColumnLevel, + } = useColumnLevelLineage( + columnLevelLineage, + ) + + const adjacencyListKeys = React.useMemo(() => { + let keys: ModelName[] = [] + + if (adjacencyListKeysColumnLevel.length > 0) { + keys = adjacencyListKeysColumnLevel + } else { + keys = Object.keys(adjacencyList) as ModelName[] + } + + return keys + }, [adjacencyListKeysColumnLevel, adjacencyList]) + + const transformNode = React.useCallback( + (nodeId: ModelNodeId, detail: ModelLineageNodeDetails) => { + const columns = detail.columns + + const node = createNode('node', nodeId, { + name: detail.name, + identifier: detail.identifier, + model_type: detail.model_type as NodeType, + kind: detail.kind!, + cron: detail.cron, + displayName: detail.display_name, + owner: detail.owner!, + dialect: detail.dialect, + version: detail.version, + tags: detail.tags || [], + columns, + }) + const selectedColumnsCount = new Set( + Object.keys(columns ?? {}).map(k => toPortID(detail.name, k)), + ).intersection(selectedColumns).size + // We are trying to project the node hight so we are including the ceiling and floor heights + const nodeBaseHeight = calculateNodeBaseHeight({ + includeNodeFooterHeight: false, + includeCeilingHeight: true, + includeFloorHeight: true, + }) + const nodeDetailsHeight = calculateNodeDetailsHeight({ + nodeDetailsCount: 0, + }) + const selectedColumnsHeight = + calculateSelectedColumnsHeight(selectedColumnsCount) + + const columnsHeight = calculateColumnsHeight({ + columnsCount: calculateNodeColumnsCount( + Object.keys(columns ?? {}).length, + ), + hasColumnsFilter: + Object.keys(columns ?? {}).length > MAX_COLUMNS_TO_DISPLAY, + }) + + node.height = + nodeBaseHeight + + nodeDetailsHeight + + selectedColumnsHeight + + columnsHeight + + return node + }, + [selectedColumns], + ) + + const transformedNodesMap = React.useMemo(() => { + return getTransformedNodes< + ModelName, + ModelLineageNodeDetails, + NodeData, + ModelNodeId + >(adjacencyListKeys, lineageDetails, transformNode) + }, [adjacencyListKeys, lineageDetails, transformNode]) + + const transformEdge = React.useCallback( + ( + edgeType: string, + edgeId: ModelEdgeId, + sourceId: ModelNodeId, + targetId: ModelNodeId, + sourceHandleId?: ModelColumnID, + targetHandleId?: ModelColumnID, + ) => { + const sourceNode = transformedNodesMap[sourceId] + const targetNode = transformedNodesMap[targetId] + const data: EdgeData = {} + + if (sourceHandleId) { + data.startColor = 'var(--color-lineage-node-port-edge-source)' + } else { + if (sourceNode?.data?.model_type) { + data.startColor = getNodeTypeColorVar( + sourceNode.data.model_type as NodeType, + ) + } + } + + if (targetHandleId) { + data.endColor = 'var(--color-lineage-node-port-edge-target)' + } else { + if (targetNode?.data?.model_type) { + data.endColor = getNodeTypeColorVar( + targetNode.data.model_type as NodeType, + ) + } + } + + if (sourceHandleId && targetHandleId) { + data.strokeWidth = 2 + } + + return createEdge( + edgeType, + edgeId, + sourceId, + targetId, + sourceHandleId, + targetHandleId, + data, + ) + }, + [transformedNodesMap], + ) + + const edgesColumnLevel = React.useMemo( + () => + getEdgesFromColumnLineage< + ModelName, + ColumnName, + EdgeData, + ModelEdgeId, + ModelNodeId, + ModelColumnID + >({ + columnLineage: adjacencyListColumnLevel, + transformEdge, + }), + [adjacencyListColumnLevel, transformEdge], + ) + + const transformedEdges = React.useMemo(() => { + return edgesColumnLevel.length > 0 + ? edgesColumnLevel + : getTransformedModelEdgesSourceTargets< + ModelName, + EdgeData, + ModelNodeId, + ModelEdgeId, + ModelColumnID + >(adjacencyListKeys, adjacencyList, transformEdge) + }, [adjacencyListKeys, adjacencyList, transformEdge, edgesColumnLevel]) + + const calculateLayout = React.useMemo(() => { + return debounce( + ( + eds: LineageEdge[], + nds: LineageNodesMap, + ) => + getLayoutedGraph( + eds, + nds, + new URL('./dagreLayout.worker.ts', import.meta.url), + ) + .then(({ edges, nodesMap }) => { + setEdges(edges) + setNodesMap(nodesMap) + }) + .catch(error => { + console.error('Layout processing failed:', error) + setEdges([]) + setNodesMap({}) + }) + .finally(() => { + setIsBuildingLayout(false) + }), + 200, + ) + }, []) + + const nodes = React.useMemo(() => { + return Object.values(nodesMap) + }, [nodesMap]) + + const currentNode = React.useMemo(() => { + return selectedModelName + ? nodesMap[toNodeID(selectedModelName)] + : null + }, [selectedModelName, nodesMap]) + + const handleReset = React.useCallback(() => { + setShowColumns(false) + setEdges([]) + setNodesMap({}) + setShowOnlySelectedNodes(false) + setSelectedNodes(new Set()) + setSelectedEdges(new Set()) + setSelectedNodeId(null) + setColumnLevelLineage(new Map()) + }, []) + + React.useEffect(() => { + setIsBuildingLayout(true) + + if (showOnlySelectedNodes) { + const onlySelectedNodesMap = getOnlySelectedNodes( + transformedNodesMap, + selectedNodes, + ) + const onlySelectedEdges = transformedEdges.filter(edge => + selectedEdges.has(edge.id as ModelEdgeId), + ) + calculateLayout(onlySelectedEdges, onlySelectedNodesMap) + } else { + calculateLayout(transformedEdges, transformedNodesMap) + } + }, [ + calculateLayout, + showOnlySelectedNodes, + transformedEdges, + transformedNodesMap, + ]) + + React.useEffect(() => { + const currentNodeId = selectedModelName + ? toNodeID(selectedModelName) + : undefined + + if (currentNodeId && currentNodeId in nodesMap) { + setSelectedNodeId(currentNodeId) + } else { + handleReset() + } + }, [handleReset, selectedModelName]) + + // Cleanup worker on unmount + React.useEffect(() => () => cleanupLayoutWorker(), []) + + function toggleColumns() { + setShowColumns(prev => !prev) + } + + return ( + + + useLineage={useModelLineage} + nodeTypes={nodeTypes} + edgeTypes={edgeTypes} + className={className} + controls={ + <> + toggleColumns()} + disabled={isBuildingLayout} + > + {showColumns ? ( + + ) : ( + + )} + + handleReset()} + disabled={isBuildingLayout} + > + + + + } + /> + + ) +} diff --git a/web/common/src/components/Lineage/stories/ModelLineageContext.ts b/web/common/src/components/Lineage/stories/ModelLineageContext.ts new file mode 100644 index 0000000000..98d2131766 --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelLineageContext.ts @@ -0,0 +1,97 @@ +import type { Branded } from '@/types' +import { + type ColumnLevelLineageAdjacencyList, + type ColumnLevelLineageContextValue, + getColumnLevelLineageContextInitial, +} from '../LineageColumnLevel/ColumnLevelLineageContext' +import { type Column } from '../LineageColumnLevel/useColumns' +import { + type LineageContextValue, + createLineageContext, + getInitial as getLineageContextInitial, +} from '../LineageContext' +import { type PathType } from '../utils' + +export type ModelName = Branded +export type ColumnName = Branded +export type ModelColumnID = Branded +export type ModelNodeId = Branded +export type ModelEdgeId = Branded +export type ModelColumn = Column & { + id: ModelColumnID + name: ColumnName + columnLineageData?: ColumnLevelLineageAdjacencyList +} + +export type NodeType = 'sql' | 'python' +export type ModelLineageNodeDetails = { + name: ModelName + display_name: string + identifier: string + version: string + dialect: string + cron: string + owner?: string + kind?: string + model_type?: string + tags?: string[] + columns?: Record +} + +export type NodeData = { + name: ModelName + displayName: string + model_type: NodeType + identifier: string + version: string + kind: string + cron: string + owner: string + dialect: string + columns?: Record + tags: string[] +} + +export type EdgeData = { + pathType?: PathType + startColor?: string + endColor?: string + strokeWidth?: number +} + +export type ModelLineageContextValue = ColumnLevelLineageContextValue< + ModelName, + ColumnName, + ModelColumnID +> & + LineageContextValue< + NodeData, + EdgeData, + ModelNodeId, + ModelEdgeId, + ModelColumnID + > + +export const initial = { + ...getLineageContextInitial(), + ...getColumnLevelLineageContextInitial< + ModelName, + ColumnName, + ModelColumnID + >(), +} + +export const { Provider, useLineage } = createLineageContext< + NodeData, + EdgeData, + ModelNodeId, + ModelEdgeId, + ModelColumnID, + ModelLineageContextValue +>(initial) + +export const ModelLineageContext = { + Provider, +} + +export const useModelLineage = useLineage diff --git a/web/common/src/components/Lineage/stories/ModelNode.tsx b/web/common/src/components/Lineage/stories/ModelNode.tsx new file mode 100644 index 0000000000..2f4705f1c1 --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelNode.tsx @@ -0,0 +1,331 @@ +import cronstrue from 'cronstrue' +import React from 'react' + +import { cn } from '@/utils' +import { HorizontalContainer } from '../../HorizontalContainer/HorizontalContainer' +import { VerticalContainer } from '../../VerticalContainer/VerticalContainer' +import { + MAX_COLUMNS_TO_DISPLAY, + calculateColumnsHeight, + calculateNodeColumnsCount, + calculateSelectedColumnsHeight, +} from '../LineageColumnLevel/help' +import { useColumns, type Column } from '../LineageColumnLevel/useColumns' +import { calculateNodeBaseHeight, calculateNodeDetailsHeight } from '../help' +import { NodeAppendix } from '../node/NodeAppendix' +import { NodeBadge } from '../node/NodeBadge' +import { NodeBase } from '../node/NodeBase' +import { NodeContainer } from '../node/NodeContainer' +import { NodeHandleIcon } from '../node/NodeHandleIcon' +import { NodeHandles } from '../node/NodeHandles' +import { NodeHeader } from '../node/NodeHeader' +import { useNodeMetadata, type NodeProps } from '../node/useNodeMetadata' +import { ZOOM_THRESHOLD } from '../utils' +import { + type ModelName as ModelNameType, + type ColumnName, + type NodeData, + useModelLineage, + type ModelColumn, + type ModelNodeId, + type ModelColumnID, + type NodeType, +} from './ModelLineageContext' +import { ModelNodeColumn } from './ModelNodeColumn' +import { + getNodeTypeBorderColor, + getNodeTypeColor, + getNodeTypeTextColor, +} from './help' +import { Tooltip } from '@/components/Tooltip/Tooltip' +import type { ColumnLevelLineageAdjacencyList } from '../LineageColumnLevel/ColumnLevelLineageContext' +import { ModelName } from '@/components/ModelName/ModelName' +import { Badge } from '@/components/Badge/Badge' +import { NodePorts } from '../node/NodePorts' + +export const ModelNode = React.memo(function ModelNode({ + id, + data, + ...props +}: NodeProps) { + const { + selectedColumns, + zoom, + currentNode, + selectedNodeId, + selectedNodes, + showColumns, + fetchingColumns, + setSelectedNodeId, + } = useModelLineage() + + const [showNodeColumns, setShowNodeColumns] = React.useState(showColumns) + const [isHovered, setIsHovered] = React.useState(false) + + const nodeId = id as ModelNodeId + + const { + leftId, + rightId, + isSelected, // if selected from inside the lineage and node is selcted + isActive, // if selected from inside the lineage and node is not selected but in path + } = useNodeMetadata(nodeId, currentNode, selectedNodeId, selectedNodes) + + const { + columns, + selectedColumns: modelSelectedColumns, + columnNames, + } = useColumns( + selectedColumns, + data.name, + data.columns, + ) + + const hasSelectedColumns = selectedColumns.intersection(columnNames).size > 0 + const hasFetchingColumns = fetchingColumns.intersection(columnNames).size > 0 + + React.useEffect(() => { + setShowNodeColumns(showColumns || isSelected) + }, [columnNames, isSelected, showColumns]) + + function toggleSelectedNode() { + setSelectedNodeId(prev => (prev === nodeId ? null : nodeId)) + } + + const shouldShowColumns = + showNodeColumns || hasSelectedColumns || hasFetchingColumns || isHovered + const modelType = data.model_type?.toLowerCase() as NodeType + const hasColumnsFilter = + shouldShowColumns && columns.length > MAX_COLUMNS_TO_DISPLAY + // We are not including the footer, because we need actual height to dynamically adjust node container height + const nodeBaseHeight = calculateNodeBaseHeight({ + includeNodeFooterHeight: false, + includeCeilingHeight: false, + includeFloorHeight: false, + }) + const nodeDetailsHeight = + zoom > ZOOM_THRESHOLD + ? calculateNodeDetailsHeight({ + nodeDetailsCount: 0, + }) + : 0 + const selectedColumnsHeight = calculateSelectedColumnsHeight( + modelSelectedColumns.length, + ) + const columnsHeight = + zoom > ZOOM_THRESHOLD && shouldShowColumns + ? calculateColumnsHeight({ + columnsCount: calculateNodeColumnsCount(columns.length), + hasColumnsFilter, + }) + : 0 + + // If zoom is less than ZOOM_THRESHOLD, we are making node looks bigger + const nodeHeight = + (zoom > ZOOM_THRESHOLD ? nodeBaseHeight : nodeBaseHeight * 2) + + nodeDetailsHeight + + selectedColumnsHeight + + columnsHeight + + return ( + setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + > + + + {zoom > ZOOM_THRESHOLD && ( + <> + {data.kind?.toUpperCase()} + {data.cron && ( + + {data.cron.toUpperCase()} + + } + className="text-xs p-2 rounded-md font-semibold" + > + + UTC Time + {cronstrue.toString(data.cron, { + dayOfWeekStartIndexZero: true, + use24HourTimeFormat: true, + verbose: true, + })} + + + )} + + )} + + + + ZOOM_THRESHOLD ? 'shrink-0 h-7' : 'h-full')} + onClick={toggleSelectedNode} + > + + } + rightIcon={ + + } + handleClassName="top-4" + > + + ZOOM_THRESHOLD + ? ' text-xs' + : 'text-2xl justify-center', + )} + /> + + + + {shouldShowColumns && ( + <> + {modelSelectedColumns.length > 0 && ( + + {modelSelectedColumns.map(column => ( + + } + ).columnLineageData + } + /> + ))} + + )} + {columns.length > 0 && zoom > ZOOM_THRESHOLD && ( + + ports={columns} + estimatedListItemHeight={24} + isFilterable={hasColumnsFilter} + filterOptions={{ + keys: ['name', 'description'], + threshold: 0.3, + }} + renderPort={column => ( + + } + ).columnLineageData + } + /> + )} + className="border-t border-lineage-divider" + /> + )} + + )} + + {modelType && ( + + ZOOM_THRESHOLD ? 'h-5' : 'h-8', + )} + > + ZOOM_THRESHOLD ? '2xs' : 'm'} + className={cn( + 'text-[white] font-black', + getNodeTypeColor(modelType), + )} + > + {modelType.toUpperCase()} + + + + )} + + ) +}) diff --git a/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx b/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx new file mode 100644 index 0000000000..35d4a0e592 --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx @@ -0,0 +1,76 @@ +import React from 'react' + +import { type ColumnLevelLineageAdjacencyList } from '../LineageColumnLevel/ColumnLevelLineageContext' +import { FactoryColumn } from '../LineageColumnLevel/FactoryColumn' + +import { + useModelLineage, + type ModelColumnID, + type ModelName, + type ModelNodeId, + type ColumnName, +} from './ModelLineageContext' + +const ModelColumn = FactoryColumn< + ModelName, + ColumnName, + ModelNodeId, + ModelColumnID +>(useModelLineage) + +export const ModelNodeColumn = React.memo(function ModelNodeColumn({ + id, + nodeId, + modelName, + name, + description, + type, + className, + columnLineageData, +}: { + id: ModelColumnID + nodeId: ModelNodeId + modelName: ModelName + name: ColumnName + type: string + description?: string | null + className?: string + columnLineageData?: ColumnLevelLineageAdjacencyList +}) { + const { selectedColumns, setColumnLevelLineage } = useModelLineage() + + const isSelectedColumn = selectedColumns.has(id) + + async function toggleSelectedColumn() { + if (isSelectedColumn) { + setColumnLevelLineage(prev => { + prev.delete(id) + return new Map(prev) + }) + } else { + if (columnLineageData != null) { + setColumnLevelLineage(prev => new Map(prev).set(id, columnLineageData)) + } + } + } + + return ( + console.log('cancel')} + renderError={error =>
Error: {error.message}
} + renderExpression={expression =>
{expression}
} + renderSource={source =>
{source}
} + /> + ) +}) diff --git a/web/common/src/components/Lineage/stories/dagreLayout.worker.ts b/web/common/src/components/Lineage/stories/dagreLayout.worker.ts new file mode 100644 index 0000000000..1a6a9d3fe7 --- /dev/null +++ b/web/common/src/components/Lineage/stories/dagreLayout.worker.ts @@ -0,0 +1,24 @@ +import { + type LayoutedGraph, + type LineageEdgeData, + type LineageNodeData, +} from '../utils' +import { buildLayout } from '../layout/dagreLayout' + +self.onmessage = < + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, +>( + event: MessageEvent>, +) => { + try { + const { edges, nodesMap } = buildLayout(event.data) + + self.postMessage({ + edges, + nodesMap, + } as LayoutedGraph) + } catch (outerError) { + self.postMessage({ error: outerError } as { error: ErrorEvent }) + } +} diff --git a/web/common/src/components/Lineage/stories/help.ts b/web/common/src/components/Lineage/stories/help.ts new file mode 100644 index 0000000000..f26c8c5752 --- /dev/null +++ b/web/common/src/components/Lineage/stories/help.ts @@ -0,0 +1,29 @@ +import { type NodeType } from './ModelLineageContext' + +export function getNodeTypeColorVar(nodeType: NodeType) { + return { + sql: 'var(--color-lineage-node-type-background-sql)', + python: 'var(--color-lineage-node-type-background-python)', + }[nodeType] +} + +export function getNodeTypeColor(nodeType: NodeType) { + return { + sql: 'bg-lineage-node-type-background-sql', + python: 'bg-lineage-node-type-background-python', + }[nodeType] +} + +export function getNodeTypeTextColor(nodeType: NodeType) { + return { + sql: 'text-lineage-node-type-foreground-sql', + python: 'text-lineage-node-type-foreground-python', + }[nodeType] +} + +export function getNodeTypeBorderColor(nodeType: NodeType) { + return { + sql: 'border-lineage-node-type-border-sql', + python: 'border-lineage-node-type-border-python', + }[nodeType] +} diff --git a/web/common/src/components/Lineage/utils.ts b/web/common/src/components/Lineage/utils.ts new file mode 100644 index 0000000000..01a277f17a --- /dev/null +++ b/web/common/src/components/Lineage/utils.ts @@ -0,0 +1,108 @@ +import type { Branded } from '@/types' +import { type Edge, type Node } from '@xyflow/react' + +export type NodeId = Branded +export type EdgeId = Branded +export type PortId = Branded + +export type LineageNodeData = Record +export type LineageEdgeData = Record + +export type LineageAdjacencyList = + Record + +export type LineageDetails = Record< + TAdjacencyListKey, + TValue +> + +export type LineageNodesMap< + TNodeData extends LineageNodeData, + TNodeID extends string = NodeId, +> = Record> +export interface LineageNode< + TNodeData extends LineageNodeData, + TNodeID extends string = NodeId, +> extends Node { + id: TNodeID +} + +export interface LineageEdge< + TEdgeData extends LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +> extends Edge { + id: TEdgeID + source: TNodeID + target: TNodeID + sourceHandle?: TPortID + targetHandle?: TPortID +} + +export type LayoutedGraph< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +> = { + edges: LineageEdge[] + nodesMap: LineageNodesMap +} + +export type PathType = 'bezier' | 'smoothstep' | 'step' | 'straight' +export type TransformNodeFn< + TData, + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +> = (nodeId: TNodeID, data: TData) => LineageNode + +export type TransformEdgeFn< + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +> = ( + edgeType: string, + edgeId: TEdgeID, + sourceId: TNodeID, + targetId: TNodeID, + sourceColumnId?: TPortID, + targetColumnId?: TPortID, +) => LineageEdge + +export const DEFAULT_NODE_HEIGHT = 32 +export const DEFAULT_NODE_WIDTH = 300 +export const DEFAULT_ZOOM = 0.85 +export const MIN_ZOOM = 0.01 +export const MAX_ZOOM = 1.75 +export const ZOOM_THRESHOLD = 0.75 +export const NODES_TRESHOLD = 200 +export const NODES_TRESHOLD_ZOOM = 0.1 + +// ID generated from toInternalID is meant to be used only internally to identify nodes, edges and ports within the graph +// Do not rely on the ID to be a valid URL, or anythjin outside of the graph +export function toInternalID( + ...args: string[] +): TReturn { + return encodeURI(args.filter(Boolean).join('.')) as TReturn +} + +export function toNodeID( + ...args: string[] +): TNodeID { + return toInternalID(...args) +} + +export function toEdgeID( + ...args: string[] +): TEdgeID { + return toInternalID(...args) +} + +export function toPortID( + ...args: string[] +): TPortId { + return toInternalID(...args) +} diff --git a/web/common/src/components/MessageContainer/MessageContainer.css b/web/common/src/components/MessageContainer/MessageContainer.css new file mode 100644 index 0000000000..f632bc791f --- /dev/null +++ b/web/common/src/components/MessageContainer/MessageContainer.css @@ -0,0 +1,3 @@ +:root { + --color-message-translucid: var(--color-neutral-3); +} diff --git a/web/common/src/components/MessageContainer/MessageContainer.tsx b/web/common/src/components/MessageContainer/MessageContainer.tsx index d51213bfaf..16d35ea47d 100644 --- a/web/common/src/components/MessageContainer/MessageContainer.tsx +++ b/web/common/src/components/MessageContainer/MessageContainer.tsx @@ -2,6 +2,8 @@ import { cn } from '@/utils' import { LoadingContainer } from '../LoadingContainer/LoadingContainer' import { HorizontalContainer } from '../HorizontalContainer/HorizontalContainer' +import './MessageContainer.css' + export interface MessageContainerProps { children: React.ReactNode className?: string @@ -19,7 +21,7 @@ export function MessageContainer({ diff --git a/web/common/src/components/Metadata/Metadata.css b/web/common/src/components/Metadata/Metadata.css new file mode 100644 index 0000000000..b1f5f0dfeb --- /dev/null +++ b/web/common/src/components/Metadata/Metadata.css @@ -0,0 +1,4 @@ +:root { + --color-metadata-label: var(--color-neutral-600); + --color-metadata-value: var(--color-prose); +} diff --git a/web/common/src/components/ModelName/ModelName.tsx b/web/common/src/components/ModelName/ModelName.tsx index 0685d4b872..83013d8108 100644 --- a/web/common/src/components/ModelName/ModelName.tsx +++ b/web/common/src/components/ModelName/ModelName.tsx @@ -144,7 +144,13 @@ export const ModelName = React.forwardRef( : 'text-model-name-model', )} > - {truncate(model, truncateMaxCharsModel, 15)} + {truncate( + model, + truncateMaxCharsModel, + truncateLimitBefore * 2, + '...', + truncateLimitBefore * 2, + )} ) diff --git a/web/common/src/components/Typography/Information.tsx b/web/common/src/components/Typography/Information.tsx index d0da7622d2..d4fc0f2b83 100644 --- a/web/common/src/components/Typography/Information.tsx +++ b/web/common/src/components/Typography/Information.tsx @@ -47,7 +47,7 @@ export function Information({ sideOffset={sideOffset} side={side} className={cn( - 'z-50 select-none max-w-md whitespace-wrap rounded-md bg-dark text-light px-4 py-2 shadow-[hsl(206_22%_7%_/_35%)_0px_10px_38px_-10px,_hsl(206_22%_7%_/_20%)_0px_10px_20px_-15px] will-change-[transform,opacity] data-[state=delayed-open]:data-[side=bottom]:animate-slideUpAndFade data-[state=delayed-open]:data-[side=left]:animate-slideRightAndFade data-[state=delayed-open]:data-[side=right]:animate-slideLeftAndFade data-[state=delayed-open]:data-[side=top]:animate-slideDownAndFade', + 'z-50 select-none whitespace-wrap rounded-md', getTextSize(size), classNameTooltip, )} diff --git a/web/common/src/components/VirtualList/FilterableList.css b/web/common/src/components/VirtualList/FilterableList.css new file mode 100644 index 0000000000..4dfdd87eea --- /dev/null +++ b/web/common/src/components/VirtualList/FilterableList.css @@ -0,0 +1,9 @@ +:root { + --color-filterable-list-counter-background: var(--color-badge-background); + --color-filterable-list-counter-foreground: var(--color-badge-foreground); + + --color-filterable-list-input-background: var(--color-input-background); + --color-filterable-list-input-foreground: var(--color-input-foreground); + --color-filterable-list-input-placeholder: var(--color-input-placeholder); + --color-filterable-list-input-border: var(--color-input-border); +} diff --git a/web/common/src/components/VirtualList/FilterableList.tsx b/web/common/src/components/VirtualList/FilterableList.tsx index ba6c5950b5..5ea0d35039 100644 --- a/web/common/src/components/VirtualList/FilterableList.tsx +++ b/web/common/src/components/VirtualList/FilterableList.tsx @@ -8,6 +8,8 @@ import { cn } from '@/utils' import { MessageContainer } from '../MessageContainer/MessageContainer' import { Input } from '../Input/Input' +import './FilterableList.css' + export interface FilterableListProps { items: TItem[] filterOptions?: IFuseOptions @@ -83,7 +85,10 @@ function Counter({ return ( {itemsLength !== filteredItemsLength && ( <> diff --git a/web/common/src/components/VirtualList/VirtualList.tsx b/web/common/src/components/VirtualList/VirtualList.tsx index 94e5d93c05..adf1010508 100644 --- a/web/common/src/components/VirtualList/VirtualList.tsx +++ b/web/common/src/components/VirtualList/VirtualList.tsx @@ -1,4 +1,8 @@ -import { useVirtualizer } from '@tanstack/react-virtual' +import { + useVirtualizer, + Virtualizer, + type VirtualItem, +} from '@tanstack/react-virtual' import React from 'react' import { HorizontalContainer } from '../HorizontalContainer/HorizontalContainer' import { cn } from '@/utils' @@ -9,7 +13,11 @@ import { VerticalContainer } from '../VerticalContainer/VerticalContainer' export interface VirtualListProps { items: TItem[] estimatedListItemHeight: number - renderListItem: (item: TItem) => React.ReactNode + renderListItem: ( + item: TItem, + virtualItem?: VirtualItem, + virtualizer?: Virtualizer, + ) => React.ReactNode isSelected?: (item: TItem) => boolean className?: string } diff --git a/web/common/src/styles/design/semantic-colors.css b/web/common/src/styles/design/semantic-colors.css index 4217b7f654..c329960ce8 100644 --- a/web/common/src/styles/design/semantic-colors.css +++ b/web/common/src/styles/design/semantic-colors.css @@ -68,14 +68,4 @@ --color-typography-tagline: var(--color-neutral-600); --color-typography-description: var(--color-neutral-500); --color-typography-info: var(--color-typography-tagline); - - /* Message */ - --color-message-lucid: var(--color-neutral-3); - - /* Input */ - --color-input-background: var(--color-light); - --color-input-background-lucid: var(--color-neutral-5); - --color-input-foreground: var(--color-prose); - --color-input-placeholder: var(--color-neutral-400); - --color-input-border: var(--color-neutral-300); } diff --git a/web/common/tailwind.base.config.js b/web/common/tailwind.base.config.js index cbba9768c2..49354591cc 100644 --- a/web/common/tailwind.base.config.js +++ b/web/common/tailwind.base.config.js @@ -1,5 +1,9 @@ -/** @type {import('tailwindcss').Config} */ -module.exports = { +import lineageConfig from './tailwind.lineage.config' +import typography from '@tailwindcss/typography' +import scrollbar from 'tailwind-scrollbar' + +export default { + presets: [lineageConfig], theme: { colors: {}, extend: { @@ -43,7 +47,7 @@ module.exports = { info: 'var(--color-typography-info)', }, message: { - lucid: 'var(--color-message-lucid)', + translucid: 'var(--color-message-translucid)', }, link: { underline: 'var(--color-link-underline)', @@ -72,8 +76,20 @@ module.exports = { background: 'var(--color-badge-background)', foreground: 'var(--color-badge-foreground)', }, + 'filterable-list': { + counter: { + background: 'var(--color-filterable-list-counter-background)', + foreground: 'var(--color-filterable-list-counter-foreground)', + }, + input: { + background: 'var(--color-filterable-list-input-background)', + foreground: 'var(--color-filterable-list-input-foreground)', + placeholder: 'var(--color-filterable-list-input-placeholder)', + border: 'var(--color-filterable-list-input-border)', + }, + }, input: { - 'background-lucid': 'var(--color-input-background-lucid)', + 'background-translucid': 'var(--color-input-background-translucid)', background: 'var(--color-input-background)', foreground: 'var(--color-input-foreground)', placeholder: 'var(--color-input-placeholder)', @@ -121,6 +137,10 @@ module.exports = { background: 'var(--color-tooltip-background)', foreground: 'var(--color-tooltip-foreground)', }, + metadata: { + label: 'var(--color-metadata-label)', + value: 'var(--color-metadata-value)', + }, }, borderRadius: { '2xs': 'var(--radius-xs)', @@ -148,8 +168,8 @@ module.exports = { }, }, plugins: [ - require('@tailwindcss/typography'), - require('tailwind-scrollbar')({ + typography, + scrollbar({ nocompatible: true, preferredStrategy: 'pseudoelements', }), diff --git a/web/common/tailwind.config.js b/web/common/tailwind.config.js index 67fe2ac528..4e7eee7f2f 100644 --- a/web/common/tailwind.config.js +++ b/web/common/tailwind.config.js @@ -1,5 +1,6 @@ -/** @type {import('tailwindcss').Config} */ -module.exports = { +import baseConfig from './tailwind.base.config' + +export default { + presets: [baseConfig], content: ['./src/**/*.{js,ts,jsx,tsx}', './src/**/*.stories.{js,ts,jsx,tsx}'], - presets: [require('./tailwind.base.config')], } diff --git a/web/common/tailwind.lineage.config.js b/web/common/tailwind.lineage.config.js new file mode 100644 index 0000000000..c2c8800a6f --- /dev/null +++ b/web/common/tailwind.lineage.config.js @@ -0,0 +1,95 @@ +export default { + theme: { + colors: {}, + extend: { + colors: { + lineage: { + background: 'var(--color-lineage-background)', + divider: 'var(--color-lineage-divider)', + border: 'var(--color-lineage-border)', + control: { + background: { + DEFAULT: 'var(--color-lineage-control-background)', + hover: 'var(--color-lineage-control-background-hover)', + }, + icon: { + background: 'var(--color-lineage-control-icon-background)', + foreground: 'var(--color-lineage-control-icon-foreground)', + }, + button: { + tooltip: { + background: + 'var(--color-lineage-control-button-tooltip-background)', + foreground: + 'var(--color-lineage-control-button-tooltip-foreground)', + }, + }, + }, + grid: { + dot: 'var(--color-lineage-grid-dot)', + }, + edge: { + DEFAULT: 'var(--color-lineage-edge)', + }, + node: { + background: 'var(--color-lineage-node-background)', + foreground: 'var(--color-lineage-node-foreground)', + selected: { + border: 'var(--color-lineage-node-selected-border)', + }, + border: { + DEFAULT: 'var(--color-lineage-node-border)', + hover: 'var(--color-lineage-node-border-hover)', + }, + badge: { + background: 'var(--color-lineage-node-badge-background)', + foreground: 'var(--color-lineage-node-badge-foreground)', + }, + appendix: { + background: 'var(--color-lineage-node-appendix-background)', + }, + handle: { + icon: { + background: + 'var(--color-lineage-node-type-handle-icon-background)', + }, + }, + port: { + background: 'var(--color-lineage-node-port-background)', + handle: { + target: 'var(--color-lineage-node-port-handle-target)', + source: 'var(--color-lineage-node-port-handle-source)', + }, + edge: { + source: 'var(--color-lineage-node-port-edge-source)', + target: 'var(--color-lineage-node-port-edge-target)', + }, + }, + }, + model: { + column: { + source: { + background: + 'var(--color-lineage-model-column-source-background)', + }, + expression: { + background: + 'var(--color-lineage-model-column-expression-background)', + }, + error: { + background: + 'var(--color-lineage-model-column-error-background)', + icon: 'var(--color-lineage-model-column-error-icon)', + }, + active: 'var(--color-lineage-model-column-active)', + icon: { + DEFAULT: 'var(--color-lineage-model-column-icon)', + active: 'var(--color-lineage-model-column-icon-active)', + }, + }, + }, + }, + }, + }, + }, +} diff --git a/web/common/tsconfig.base.json b/web/common/tsconfig.base.json index 99a214fe47..ca7c1e0785 100644 --- a/web/common/tsconfig.base.json +++ b/web/common/tsconfig.base.json @@ -3,7 +3,7 @@ "target": "ES2022", "jsx": "react-jsx", "module": "ESNext", - "lib": ["ES2022", "DOM", "DOM.Iterable"], + "lib": ["ESNext", "DOM", "DOM.Iterable"], "types": ["vite/client"], /* Bundler mode */ diff --git a/web/common/tsconfig.build.json b/web/common/tsconfig.build.json index 7eba394efd..527242427c 100644 --- a/web/common/tsconfig.build.json +++ b/web/common/tsconfig.build.json @@ -15,6 +15,7 @@ "declarationMap": true, "declarationDir": "./dist", "emitDeclarationOnly": false, - "outDir": "./dist" + "outDir": "./dist", + "rootDir": "./src" } } diff --git a/web/common/vite.config.js b/web/common/vite.config.js index 237bed29bd..f123507484 100644 --- a/web/common/vite.config.js +++ b/web/common/vite.config.js @@ -22,6 +22,10 @@ export default defineConfig({ src: 'tailwind.base.config.js', dest: 'configs', }, + { + src: 'tailwind.lineage.config.js', + dest: 'configs', + }, ], }), ], @@ -33,9 +37,19 @@ export default defineConfig({ build: { cssMinify: true, lib: { - entry: path.resolve(__dirname, 'src/index.ts'), + entry: { + 'sqlmesh-common': path.resolve(__dirname, 'src/index.ts'), + 'lineage/index': path.resolve( + __dirname, + 'src/components/Lineage/index.ts', + ), + }, name: 'sqlmesh-common', - fileName: format => `sqlmesh-common.${format}.js`, + fileName: (format, entryName) => + ({ + 'sqlmesh-common': `sqlmesh-common.${format}.js`, + 'lineage/index': `lineage/index.${format}.js`, + })[entryName], }, rollupOptions: { external: [ From 42fbc64d0468b741e58d23f77fdd11bb84719c9c Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Thu, 2 Oct 2025 12:26:31 -0700 Subject: [PATCH 040/173] Chore: Cache results of get_data_objects (#5467) --- sqlmesh/core/engine_adapter/base.py | 132 ++++++- sqlmesh/core/engine_adapter/base_postgres.py | 10 +- sqlmesh/core/engine_adapter/bigquery.py | 7 + sqlmesh/core/engine_adapter/clickhouse.py | 27 +- sqlmesh/core/engine_adapter/mssql.py | 9 + sqlmesh/core/engine_adapter/mysql.py | 4 +- sqlmesh/core/engine_adapter/postgres.py | 6 +- sqlmesh/core/engine_adapter/spark.py | 6 +- sqlmesh/core/snapshot/evaluator.py | 71 +++- tests/core/engine_adapter/test_athena.py | 1 + tests/core/engine_adapter/test_base.py | 370 +++++++++++++++++++ tests/core/engine_adapter/test_snowflake.py | 8 +- tests/core/test_snapshot_evaluator.py | 8 + tests/dbt/test_transformation.py | 6 +- 14 files changed, 618 insertions(+), 47 deletions(-) diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index 68c6404081..d9cc4f44a2 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -161,6 +161,7 @@ def __init__( self.correlation_id = correlation_id self._schema_differ_overrides = schema_differ_overrides self._query_execution_tracker = query_execution_tracker + self._data_object_cache: t.Dict[str, t.Optional[DataObject]] = {} def with_settings(self, **kwargs: t.Any) -> EngineAdapter: extra_kwargs = { @@ -983,6 +984,13 @@ def _create_table( ), track_rows_processed=track_rows_processed, ) + # Extract table name to clear cache + table_name = ( + table_name_or_schema.this + if isinstance(table_name_or_schema, exp.Schema) + else table_name_or_schema + ) + self._clear_data_object_cache(table_name) def _build_create_table_exp( self, @@ -1038,7 +1046,8 @@ def create_table_like( target_table_name: The name of the table to create. Can be fully qualified or just table name. source_table_name: The name of the table to base the new table on. """ - self.create_table(target_table_name, self.columns(source_table_name), exists=exists) + self._create_table_like(target_table_name, source_table_name, exists=exists, **kwargs) + self._clear_data_object_cache(target_table_name) def clone_table( self, @@ -1074,6 +1083,7 @@ def clone_table( **kwargs, ) ) + self._clear_data_object_cache(target_table_name) def drop_data_object(self, data_object: DataObject, ignore_if_not_exists: bool = True) -> None: """Drops a data object of arbitrary type. @@ -1139,6 +1149,7 @@ def _drop_object( drop_args["cascade"] = cascade self.execute(exp.Drop(this=exp.to_table(name), kind=kind, exists=exists, **drop_args)) + self._clear_data_object_cache(name) def get_alter_operations( self, @@ -1329,6 +1340,8 @@ def create_view( quote_identifiers=self.QUOTE_IDENTIFIERS_IN_VIEWS, ) + self._clear_data_object_cache(view_name) + # Register table comment with commands if the engine doesn't support doing it in CREATE if ( table_description @@ -1458,8 +1471,14 @@ def columns( } def table_exists(self, table_name: TableName) -> bool: + table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None + try: - self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE")) + self.execute(exp.Describe(this=table, kind="TABLE")) return True except Exception: return False @@ -2253,24 +2272,34 @@ def rename_table( "Tried to rename table across catalogs which is not supported" ) self._rename_table(old_table_name, new_table_name) + self._clear_data_object_cache(old_table_name) + self._clear_data_object_cache(new_table_name) - def get_data_object(self, target_name: TableName) -> t.Optional[DataObject]: + def get_data_object( + self, target_name: TableName, safe_to_cache: bool = False + ) -> t.Optional[DataObject]: target_table = exp.to_table(target_name) existing_data_objects = self.get_data_objects( - schema_(target_table.db, target_table.catalog), {target_table.name} + schema_(target_table.db, target_table.catalog), + {target_table.name}, + safe_to_cache=safe_to_cache, ) if existing_data_objects: return existing_data_objects[0] return None def get_data_objects( - self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None + self, + schema_name: SchemaName, + object_names: t.Optional[t.Set[str]] = None, + safe_to_cache: bool = False, ) -> t.List[DataObject]: """Lists all data objects in the target schema. Args: schema_name: The name of the schema to list data objects from. object_names: If provided, only return data objects with these names. + safe_to_cache: Whether it is safe to cache the results of this call. Returns: A list of data objects in the target schema. @@ -2278,15 +2307,64 @@ def get_data_objects( if object_names is not None: if not object_names: return [] - object_names_list = list(object_names) - batches = [ - object_names_list[i : i + self.DATA_OBJECT_FILTER_BATCH_SIZE] - for i in range(0, len(object_names_list), self.DATA_OBJECT_FILTER_BATCH_SIZE) - ] - return [ - obj for batch in batches for obj in self._get_data_objects(schema_name, set(batch)) - ] - return self._get_data_objects(schema_name) + + # Check cache for each object name + target_schema = to_schema(schema_name) + cached_objects = [] + missing_names = set() + + for name in object_names: + cache_key = _get_data_object_cache_key( + target_schema.catalog, target_schema.db, name + ) + if cache_key in self._data_object_cache: + logger.debug("Data object cache hit: %s", cache_key) + data_object = self._data_object_cache[cache_key] + # If the object is none, then the table was previously looked for but not found + if data_object: + cached_objects.append(data_object) + else: + logger.debug("Data object cache miss: %s", cache_key) + missing_names.add(name) + + # Fetch missing objects from database + if missing_names: + object_names_list = list(missing_names) + batches = [ + object_names_list[i : i + self.DATA_OBJECT_FILTER_BATCH_SIZE] + for i in range(0, len(object_names_list), self.DATA_OBJECT_FILTER_BATCH_SIZE) + ] + + fetched_objects = [] + fetched_object_names = set() + for batch in batches: + objects = self._get_data_objects(schema_name, set(batch)) + for obj in objects: + if safe_to_cache: + cache_key = _get_data_object_cache_key( + obj.catalog, obj.schema_name, obj.name + ) + self._data_object_cache[cache_key] = obj + fetched_objects.append(obj) + fetched_object_names.add(obj.name) + + if safe_to_cache: + for missing_name in missing_names - fetched_object_names: + cache_key = _get_data_object_cache_key( + target_schema.catalog, target_schema.db, missing_name + ) + self._data_object_cache[cache_key] = None + + return cached_objects + fetched_objects + + return cached_objects + + fetched_objects = self._get_data_objects(schema_name) + if safe_to_cache: + for obj in fetched_objects: + cache_key = _get_data_object_cache_key(obj.catalog, obj.schema_name, obj.name) + self._data_object_cache[cache_key] = obj + return fetched_objects def fetchone( self, @@ -2693,6 +2771,17 @@ def _to_sql(self, expression: exp.Expression, quote: bool = True, **kwargs: t.An return expression.sql(**sql_gen_kwargs, copy=False) # type: ignore + def _clear_data_object_cache(self, table_name: t.Optional[TableName] = None) -> None: + """Clears the cache entry for the given table name, or clears the entire cache if table_name is None.""" + if table_name is None: + logger.debug("Clearing entire data object cache") + self._data_object_cache.clear() + else: + table = exp.to_table(table_name) + cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + logger.debug("Clearing data object cache key: %s", cache_key) + self._data_object_cache.pop(cache_key, None) + def _get_data_objects( self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None ) -> t.List[DataObject]: @@ -2878,6 +2967,15 @@ def _create_column_comments( exc_info=True, ) + def _create_table_like( + self, + target_table_name: TableName, + source_table_name: TableName, + exists: bool, + **kwargs: t.Any, + ) -> None: + self.create_table(target_table_name, self.columns(source_table_name), exists=exists) + def _rename_table( self, old_table_name: TableName, @@ -2940,3 +3038,9 @@ def _decoded_str(value: t.Union[str, bytes]) -> str: if isinstance(value, bytes): return value.decode("utf-8") return value + + +def _get_data_object_cache_key(catalog: t.Optional[str], schema_name: str, object_name: str) -> str: + """Returns a cache key for a data object based on its fully qualified name.""" + catalog = f"{catalog}." if catalog else "" + return f"{catalog}{schema_name}.{object_name}" diff --git a/sqlmesh/core/engine_adapter/base_postgres.py b/sqlmesh/core/engine_adapter/base_postgres.py index c6ba7d6d62..3de975d6a5 100644 --- a/sqlmesh/core/engine_adapter/base_postgres.py +++ b/sqlmesh/core/engine_adapter/base_postgres.py @@ -1,11 +1,12 @@ from __future__ import annotations import typing as t +import logging from sqlglot import exp from sqlmesh.core.dialect import to_schema -from sqlmesh.core.engine_adapter import EngineAdapter +from sqlmesh.core.engine_adapter.base import EngineAdapter, _get_data_object_cache_key from sqlmesh.core.engine_adapter.shared import ( CatalogSupport, CommentCreationTable, @@ -20,6 +21,9 @@ from sqlmesh.core.engine_adapter._typing import QueryOrDF +logger = logging.getLogger(__name__) + + class BasePostgresEngineAdapter(EngineAdapter): DEFAULT_BATCH_SIZE = 400 COMMENT_CREATION_TABLE = CommentCreationTable.COMMENT_COMMAND_ONLY @@ -75,6 +79,10 @@ def table_exists(self, table_name: TableName) -> bool: Reference: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/cursor.py#L528-L553 """ table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None sql = ( exp.select("1") diff --git a/sqlmesh/core/engine_adapter/bigquery.py b/sqlmesh/core/engine_adapter/bigquery.py index 26abad9ebc..09fd7537ef 100644 --- a/sqlmesh/core/engine_adapter/bigquery.py +++ b/sqlmesh/core/engine_adapter/bigquery.py @@ -8,6 +8,7 @@ from sqlglot.transforms import remove_precision_parameterized_types from sqlmesh.core.dialect import to_schema +from sqlmesh.core.engine_adapter.base import _get_data_object_cache_key from sqlmesh.core.engine_adapter.mixins import ( ClusteredByMixin, RowDiffMixin, @@ -744,6 +745,12 @@ def insert_overwrite_by_partition( ) def table_exists(self, table_name: TableName) -> bool: + table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None + try: from google.cloud.exceptions import NotFound except ModuleNotFoundError: diff --git a/sqlmesh/core/engine_adapter/clickhouse.py b/sqlmesh/core/engine_adapter/clickhouse.py index 84d6ad311e..45c22a6e55 100644 --- a/sqlmesh/core/engine_adapter/clickhouse.py +++ b/sqlmesh/core/engine_adapter/clickhouse.py @@ -224,7 +224,7 @@ def _insert_overwrite_by_condition( target_columns_to_types = target_columns_to_types or self.columns(target_table) temp_table = self._get_temp_table(target_table) - self._create_table_like(temp_table, target_table) + self.create_table_like(temp_table, target_table) # REPLACE BY KEY: extract kwargs if present dynamic_key = kwargs.get("dynamic_key") @@ -456,7 +456,11 @@ def insert_overwrite_by_partition( ) def _create_table_like( - self, target_table_name: TableName, source_table_name: TableName + self, + target_table_name: TableName, + source_table_name: TableName, + exists: bool, + **kwargs: t.Any, ) -> None: """Create table with identical structure as source table""" self.execute( @@ -632,16 +636,15 @@ def _drop_object( kind: What kind of object to drop. Defaults to TABLE **drop_args: Any extra arguments to set on the Drop expression """ - self.execute( - exp.Drop( - this=exp.to_table(name), - kind=kind, - exists=exists, - cluster=exp.OnCluster(this=exp.to_identifier(self.cluster)) - if self.engine_run_mode.is_cluster - else None, - **drop_args, - ) + super()._drop_object( + name=name, + exists=exists, + kind=kind, + cascade=cascade, + cluster=exp.OnCluster(this=exp.to_identifier(self.cluster)) + if self.engine_run_mode.is_cluster + else None, + **drop_args, ) def _build_partitioned_by_exp( diff --git a/sqlmesh/core/engine_adapter/mssql.py b/sqlmesh/core/engine_adapter/mssql.py index fd0bf1011b..05c3753f14 100644 --- a/sqlmesh/core/engine_adapter/mssql.py +++ b/sqlmesh/core/engine_adapter/mssql.py @@ -3,6 +3,7 @@ from __future__ import annotations import typing as t +import logging from sqlglot import exp @@ -13,6 +14,7 @@ InsertOverwriteStrategy, MERGE_SOURCE_ALIAS, MERGE_TARGET_ALIAS, + _get_data_object_cache_key, ) from sqlmesh.core.engine_adapter.mixins import ( GetCurrentCatalogFromFunctionMixin, @@ -36,6 +38,9 @@ from sqlmesh.core.engine_adapter._typing import DF, Query, QueryOrDF +logger = logging.getLogger(__name__) + + @set_catalog() class MSSQLEngineAdapter( EngineAdapterWithIndexSupport, @@ -144,6 +149,10 @@ def build_var_length_col( def table_exists(self, table_name: TableName) -> bool: """MsSql doesn't support describe so we query information_schema.""" table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None sql = ( exp.select("1") diff --git a/sqlmesh/core/engine_adapter/mysql.py b/sqlmesh/core/engine_adapter/mysql.py index 26cc7c0197..31773d6c63 100644 --- a/sqlmesh/core/engine_adapter/mysql.py +++ b/sqlmesh/core/engine_adapter/mysql.py @@ -164,11 +164,11 @@ def _create_column_comments( exc_info=True, ) - def create_table_like( + def _create_table_like( self, target_table_name: TableName, source_table_name: TableName, - exists: bool = True, + exists: bool, **kwargs: t.Any, ) -> None: self.execute( diff --git a/sqlmesh/core/engine_adapter/postgres.py b/sqlmesh/core/engine_adapter/postgres.py index e9c212bd5f..79431ee360 100644 --- a/sqlmesh/core/engine_adapter/postgres.py +++ b/sqlmesh/core/engine_adapter/postgres.py @@ -34,7 +34,7 @@ class PostgresEngineAdapter( HAS_VIEW_BINDING = True CURRENT_CATALOG_EXPRESSION = exp.column("current_catalog") SUPPORTS_REPLACE_TABLE = False - MAX_IDENTIFIER_LENGTH = 63 + MAX_IDENTIFIER_LENGTH: t.Optional[int] = 63 SUPPORTS_QUERY_EXECUTION_TRACKING = True SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { @@ -79,11 +79,11 @@ def _fetch_native_df( self._connection_pool.commit() return df - def create_table_like( + def _create_table_like( self, target_table_name: TableName, source_table_name: TableName, - exists: bool = True, + exists: bool, **kwargs: t.Any, ) -> None: self.execute( diff --git a/sqlmesh/core/engine_adapter/spark.py b/sqlmesh/core/engine_adapter/spark.py index 18ba6ea106..b2d6a9cbb5 100644 --- a/sqlmesh/core/engine_adapter/spark.py +++ b/sqlmesh/core/engine_adapter/spark.py @@ -402,14 +402,16 @@ def get_current_database(self) -> str: return self.spark.catalog.currentDatabase() return self.fetchone(exp.select(exp.func("current_database")))[0] # type: ignore - def get_data_object(self, target_name: TableName) -> t.Optional[DataObject]: + def get_data_object( + self, target_name: TableName, safe_to_cache: bool = False + ) -> t.Optional[DataObject]: target_table = exp.to_table(target_name) if isinstance(target_table.this, exp.Dot) and target_table.this.expression.name.startswith( f"{self.BRANCH_PREFIX}{self.WAP_PREFIX}" ): # Exclude the branch name target_table.set("this", target_table.this.this) - return super().get_data_object(target_table) + return super().get_data_object(target_table, safe_to_cache=safe_to_cache) def create_state_table( self, diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 4ac87199c6..1483bdeece 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -307,6 +307,9 @@ def promote( ] self._create_schemas(gateway_table_pairs=gateway_table_pairs) + # Fetch the view data objects for the promoted snapshots to get them cached + self._get_virtual_data_objects(target_snapshots, environment_naming_info) + deployability_index = deployability_index or DeployabilityIndex.all_deployable() with self.concurrent_context(): concurrent_apply_to_snapshots( @@ -425,7 +428,9 @@ def get_snapshots_to_create( target_snapshots: Target snapshots. deployability_index: Determines snapshots that are deployable / representative in the context of this creation. """ - existing_data_objects = self._get_data_objects(target_snapshots, deployability_index) + existing_data_objects = self._get_physical_data_objects( + target_snapshots, deployability_index + ) snapshots_to_create = [] for snapshot in target_snapshots: if not snapshot.is_model or snapshot.is_symbolic: @@ -482,7 +487,7 @@ def migrate( deployability_index: Determines snapshots that are deployable in the context of this evaluation. """ deployability_index = deployability_index or DeployabilityIndex.all_deployable() - target_data_objects = self._get_data_objects(target_snapshots, deployability_index) + target_data_objects = self._get_physical_data_objects(target_snapshots, deployability_index) if not target_data_objects: return @@ -1472,7 +1477,7 @@ def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex and adapter.table_exists(snapshot.table_name()) ) - def _get_data_objects( + def _get_physical_data_objects( self, target_snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex, @@ -1488,6 +1493,59 @@ def _get_data_objects( A dictionary of snapshot IDs to existing data objects of their physical tables. If the data object for a snapshot is not found, it will not be included in the dictionary. """ + return self._get_data_objects( + target_snapshots, + lambda s: exp.to_table( + s.table_name(deployability_index.is_deployable(s)), dialect=s.model.dialect + ), + ) + + def _get_virtual_data_objects( + self, + target_snapshots: t.Iterable[Snapshot], + environment_naming_info: EnvironmentNamingInfo, + ) -> t.Dict[SnapshotId, DataObject]: + """Returns a dictionary of snapshot IDs to existing data objects of their virtual views. + + Args: + target_snapshots: Target snapshots. + environment_naming_info: The environment naming info of the target virtual environment. + + Returns: + A dictionary of snapshot IDs to existing data objects of their virtual views. If the data object + for a snapshot is not found, it will not be included in the dictionary. + """ + + def _get_view_name(s: Snapshot) -> exp.Table: + adapter = ( + self.get_adapter(s.model_gateway) + if environment_naming_info.gateway_managed + else self.adapter + ) + return exp.to_table( + s.qualified_view_name.for_environment( + environment_naming_info, dialect=adapter.dialect + ), + dialect=adapter.dialect, + ) + + return self._get_data_objects(target_snapshots, _get_view_name) + + def _get_data_objects( + self, + target_snapshots: t.Iterable[Snapshot], + table_name_callable: t.Callable[[Snapshot], exp.Table], + ) -> t.Dict[SnapshotId, DataObject]: + """Returns a dictionary of snapshot IDs to existing data objects. + + Args: + target_snapshots: Target snapshots. + table_name_callable: A function that takes a snapshot and returns the table to look for. + + Returns: + A dictionary of snapshot IDs to existing data objects. If the data object for a snapshot is not found, + it will not be included in the dictionary. + """ tables_by_gateway_and_schema: t.Dict[t.Union[str, None], t.Dict[exp.Table, set[str]]] = ( defaultdict(lambda: defaultdict(set)) ) @@ -1495,8 +1553,7 @@ def _get_data_objects( for snapshot in target_snapshots: if not snapshot.is_model or snapshot.is_symbolic: continue - is_deployable = deployability_index.is_deployable(snapshot) - table = exp.to_table(snapshot.table_name(is_deployable), dialect=snapshot.model.dialect) + table = table_name_callable(snapshot) table_schema = d.schema_(table.db, catalog=table.catalog) tables_by_gateway_and_schema[snapshot.model_gateway][table_schema].add(table.name) snapshots_by_table_name[table.name] = snapshot @@ -1507,7 +1564,9 @@ def _get_data_objects_in_schema( gateway: t.Optional[str] = None, ) -> t.List[DataObject]: logger.info("Listing data objects in schema %s", schema.sql()) - return self.get_adapter(gateway).get_data_objects(schema, object_names) + return self.get_adapter(gateway).get_data_objects( + schema, object_names, safe_to_cache=True + ) with self.concurrent_context(): existing_objects: t.List[DataObject] = [] diff --git a/tests/core/engine_adapter/test_athena.py b/tests/core/engine_adapter/test_athena.py index 4fe57baf34..66e84ae025 100644 --- a/tests/core/engine_adapter/test_athena.py +++ b/tests/core/engine_adapter/test_athena.py @@ -312,6 +312,7 @@ def test_replace_query(adapter: AthenaEngineAdapter, mocker: MockerFixture): ) mocker.patch.object(adapter, "_get_data_objects", return_value=[]) adapter.cursor.execute.reset_mock() + adapter._clear_data_object_cache() adapter.s3_warehouse_location = "s3://foo" adapter.replace_query( diff --git a/tests/core/engine_adapter/test_base.py b/tests/core/engine_adapter/test_base.py index 140fac43eb..ba775c0779 100644 --- a/tests/core/engine_adapter/test_base.py +++ b/tests/core/engine_adapter/test_base.py @@ -3695,3 +3695,373 @@ def test_casted_columns( assert [ x.sql() for x in EngineAdapter._casted_columns(columns_to_types, source_columns) ] == expected + + +def test_data_object_cache_get_data_objects( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + result1 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert len(result1) == 2 + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert len(result2) == 2 + assert mock_get_data_objects.call_count == 1 # Should not increase + + result3 = adapter.get_data_objects("test_schema", {"table1"}) + assert len(result3) == 1 + assert result3[0].name == "table1" + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_get_data_objects_bypasses_cache( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + assert adapter.get_data_objects("test_schema") + assert adapter.get_data_objects("test_schema", {"table1", "table2"}) + assert adapter.get_data_objects("test_schema", {"table1", "table2"}) + assert adapter.get_data_objects("test_schema", {"table1"}) + assert adapter.get_data_object("test_schema.table1") is not None + + mock_get_data_objects.return_value = [] + assert not adapter.get_data_objects("test_schema") + assert not adapter.get_data_objects("test_schema", {"missing"}) + assert not adapter.get_data_objects("test_schema", {"missing"}) + assert adapter.get_data_object("test_schema.missing") is None + + # None of the calls should've been cached + assert mock_get_data_objects.call_count == 9 + assert not adapter._data_object_cache + + +def test_data_object_cache_get_data_objects_no_object_names( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + result1 = adapter.get_data_objects("test_schema", safe_to_cache=True) + assert len(result1) == 2 + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert len(result2) == 2 + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_get_data_object( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + result1 = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result1 is not None + assert result1.name == "test_table" + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result2 is not None + assert result2.name == "test_table" + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_cleared_on_drop_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + adapter.drop_table("test_schema.test_table") + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_drop_view( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + view = DataObject(catalog=None, schema="test_schema", name="test_view", type="view") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[view]) + + adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + adapter.drop_view("test_schema.test_view") + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_drop_data_object( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + adapter.drop_data_object(table) + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_create_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlglot import exp + + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + # Initially cache that table doesn't exist + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + # Create the table + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + mock_get_data_objects.return_value = [table] + adapter.create_table( + "test_schema.test_table", + {"col1": exp.DataType.build("INT")}, + ) + + # Cache should be cleared, so next get_data_object should call _get_data_objects again + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is not None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_create_view( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlglot import parse_one + + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + # Initially cache that view doesn't exist + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + # Create the view + view = DataObject(catalog=None, schema="test_schema", name="test_view", type="view") + mock_get_data_objects.return_value = [view] + adapter.create_view( + "test_schema.test_view", + parse_one("SELECT 1 AS col1"), + ) + + # Cache should be cleared, so next get_data_object should call _get_data_objects again + result = adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert result is not None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_clone_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlmesh.core.engine_adapter.snowflake import SnowflakeEngineAdapter + + adapter = make_mocked_engine_adapter( + SnowflakeEngineAdapter, patch_get_data_objects=False, default_catalog="test_catalog" + ) + + # Initially cache that target table doesn't exist + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.test_target", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + # Clone the table + target_table = DataObject( + catalog="test_catalog", schema="test_schema", name="test_target", type="table" + ) + mock_get_data_objects.return_value = [target_table] + adapter.clone_table("test_schema.test_target", "test_schema.test_source") + + # Cache should be cleared, so next get_data_object should call _get_data_objects again + result = adapter.get_data_object("test_schema.test_target", safe_to_cache=True) + assert result is not None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_with_catalog( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlmesh.core.engine_adapter.snowflake import SnowflakeEngineAdapter + + adapter = make_mocked_engine_adapter( + SnowflakeEngineAdapter, patch_get_data_objects=False, default_catalog="test_catalog" + ) + + table = DataObject( + catalog="test_catalog", schema="test_schema", name="test_table", type="table" + ) + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + result1 = adapter.get_data_object("test_catalog.test_schema.test_table", safe_to_cache=True) + assert result1 is not None + assert result1.catalog == "test_catalog" + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_object("test_catalog.test_schema.test_table", safe_to_cache=True) + assert result2 is not None + assert result2.catalog == "test_catalog" + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_partial_cache_hit( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + table3 = DataObject(catalog=None, schema="test_schema", name="table3", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + mock_get_data_objects.return_value = [table3] + result = adapter.get_data_objects("test_schema", {"table1", "table3"}, safe_to_cache=True) + + assert len(result) == 2 + assert {obj.name for obj in result} == {"table1", "table3"} + assert mock_get_data_objects.call_count == 2 # Called again for table3 + + +def test_data_object_cache_get_data_objects_missing_objects( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + + result1 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert not result1 + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert not result2 + assert mock_get_data_objects.call_count == 1 # Should not increase + + result3 = adapter.get_data_objects("test_schema", {"table1"}, safe_to_cache=True) + assert not result3 + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_cleared_on_rename_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + old_table = DataObject(catalog=None, schema="test_schema", name="old_table", type="table") + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[old_table] + ) + + result = adapter.get_data_object("test_schema.old_table", safe_to_cache=True) + assert result is not None + assert result.name == "old_table" + assert mock_get_data_objects.call_count == 1 + + new_table = DataObject(catalog=None, schema="test_schema", name="new_table", type="table") + mock_get_data_objects.return_value = [new_table] + adapter.rename_table("test_schema.old_table", "test_schema.new_table") + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.old_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + mock_get_data_objects.return_value = [new_table] + result = adapter.get_data_object("test_schema.new_table", safe_to_cache=True) + assert result is not None + assert result.name == "new_table" + assert mock_get_data_objects.call_count == 3 + + +def test_data_object_cache_cleared_on_create_table_like( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlglot import exp + + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + columns_to_types = { + "col1": exp.DataType.build("INT"), + "col2": exp.DataType.build("TEXT"), + } + mocker.patch.object(adapter, "columns", return_value=columns_to_types) + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.target_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + target_table = DataObject(catalog=None, schema="test_schema", name="target_table", type="table") + mock_get_data_objects.return_value = [target_table] + adapter.create_table_like("test_schema.target_table", "test_schema.source_table") + + result = adapter.get_data_object("test_schema.target_table", safe_to_cache=True) + assert result is not None + assert result.name == "target_table" + assert mock_get_data_objects.call_count == 2 diff --git a/tests/core/engine_adapter/test_snowflake.py b/tests/core/engine_adapter/test_snowflake.py index 62c4a4f3eb..ce4d3a886c 100644 --- a/tests/core/engine_adapter/test_snowflake.py +++ b/tests/core/engine_adapter/test_snowflake.py @@ -358,12 +358,12 @@ def test_create_managed_table(make_mocked_engine_adapter: t.Callable, mocker: Mo def test_drop_managed_table(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) - adapter.drop_managed_table(table_name=exp.parse_identifier("foo"), exists=False) - adapter.drop_managed_table(table_name=exp.parse_identifier("foo"), exists=True) + adapter.drop_managed_table(table_name="foo.bar", exists=False) + adapter.drop_managed_table(table_name="foo.bar", exists=True) assert to_sql_calls(adapter) == [ - 'DROP DYNAMIC TABLE "foo"', - 'DROP DYNAMIC TABLE IF EXISTS "foo"', + 'DROP DYNAMIC TABLE "foo"."bar"', + 'DROP DYNAMIC TABLE IF EXISTS "foo"."bar"', ] diff --git a/tests/core/test_snapshot_evaluator.py b/tests/core/test_snapshot_evaluator.py index 2df91afb10..19685e81c3 100644 --- a/tests/core/test_snapshot_evaluator.py +++ b/tests/core/test_snapshot_evaluator.py @@ -888,6 +888,7 @@ def test_create_prod_table_exists(mocker: MockerFixture, adapter_mock, make_snap { f"test_schema__test_model__{snapshot.version}", }, + safe_to_cache=True, ) @@ -974,6 +975,7 @@ def test_create_only_dev_table_exists(mocker: MockerFixture, adapter_mock, make_ { f"test_schema__test_model__{snapshot.version}__dev", }, + safe_to_cache=True, ) @@ -1023,6 +1025,7 @@ def test_create_new_forward_only_model(mocker: MockerFixture, adapter_mock, make { f"test_schema__test_model__{snapshot.dev_version}__dev", }, + safe_to_cache=True, ) @@ -1113,6 +1116,7 @@ def test_create_tables_exist( adapter_mock.get_data_objects.assert_called_once_with( schema_("sqlmesh__db"), {table_name}, + safe_to_cache=True, ) adapter_mock.create_schema.assert_not_called() adapter_mock.create_table.assert_not_called() @@ -1150,6 +1154,7 @@ def test_create_prod_table_exists_forward_only(mocker: MockerFixture, adapter_mo { f"test_schema__test_model__{snapshot.version}", }, + safe_to_cache=True, ) adapter_mock.create_table.assert_not_called() @@ -1341,9 +1346,11 @@ def test_promote_deployable(mocker: MockerFixture, make_snapshot): { f"test_schema__test_model__{snapshot.version}", }, + safe_to_cache=True, ) adapter_mock.create_table.assert_not_called() + adapter_mock.get_data_objects.return_value = [] evaluator.promote([snapshot], EnvironmentNamingInfo(name="test_env")) adapter_mock.create_schema.assert_called_once_with(to_schema("test_schema__test_env")) @@ -4188,6 +4195,7 @@ def test_multiple_engine_promotion(mocker: MockerFixture, adapter_mock, make_sna connection_mock.cursor.return_value = cursor_mock adapter = EngineAdapter(lambda: connection_mock, "") adapter.with_settings = lambda **kwargs: adapter # type: ignore + adapter._get_data_objects = lambda *args, **kwargs: [] # type: ignore engine_adapters = {"default": adapter_mock, "secondary": adapter} def columns(table_name): diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index 9a9ce8f906..a33e3ed843 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -122,10 +122,10 @@ def test_dbt_custom_materialization(): selected_model = list(plan.selected_models)[0] assert selected_model == "model.sushi.custom_incremental_model" - qoery = "SELECT * FROM sushi.custom_incremental_model ORDER BY created_at" + query = "SELECT * FROM sushi.custom_incremental_model ORDER BY created_at" hook_table = "SELECT * FROM hook_table ORDER BY id" sushi_context.apply(plan) - result = sushi_context.engine_adapter.fetchdf(qoery) + result = sushi_context.engine_adapter.fetchdf(query) assert len(result) == 1 assert {"created_at", "id"}.issubset(result.columns) @@ -140,7 +140,7 @@ def test_dbt_custom_materialization(): tomorrow = datetime.now() + timedelta(days=1) sushi_context.run(select_models=["sushi.custom_incremental_model"], execution_time=tomorrow) - result_after_run = sushi_context.engine_adapter.fetchdf(qoery) + result_after_run = sushi_context.engine_adapter.fetchdf(query) assert {"created_at", "id"}.issubset(result_after_run.columns) # this should have added new unique values for the new row From 858f4320549fed120699e3abb0bd21d16f9a3c1d Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Thu, 2 Oct 2025 12:59:28 -0700 Subject: [PATCH 041/173] feat(web_common):add option to toggle node dragging in lineage (#5473) --- .../src/components/Lineage/LineageLayout.tsx | 60 ++++++++++++++++-- .../Lineage/stories/Lineage.stories.tsx | 63 ++++++++++++++----- .../Lineage/stories/ModelLineage.tsx | 11 +++- 3 files changed, 111 insertions(+), 23 deletions(-) diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx index 411ace4e65..4b2e06b0b3 100644 --- a/web/common/src/components/Lineage/LineageLayout.tsx +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -2,7 +2,9 @@ import { Background, BackgroundVariant, Controls, + type EdgeChange, type EdgeTypes, + type NodeChange, type NodeTypes, ReactFlow, ReactFlowProvider, @@ -12,6 +14,8 @@ import { getOutgoers, useReactFlow, useViewport, + applyNodeChanges, + applyEdgeChanges, } from '@xyflow/react' import '@xyflow/react/dist/style.css' @@ -55,6 +59,8 @@ export function LineageLayout< edgeTypes, className, controls, + nodesDraggable, + nodesConnectable, useLineage, onNodeClick, onNodeDoubleClick, @@ -69,6 +75,8 @@ export function LineageLayout< nodeTypes?: NodeTypes edgeTypes?: EdgeTypes className?: string + nodesDraggable?: boolean + nodesConnectable?: boolean controls?: | React.ReactNode | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) @@ -86,6 +94,8 @@ export function LineageLayout< + nodesDraggable?: boolean + nodesConnectable?: boolean nodeTypes?: NodeTypes edgeTypes?: EdgeTypes className?: string @@ -140,8 +154,8 @@ function LineageLayoutBase< isBuildingLayout, currentNode, zoom, - nodes, - edges, + nodes: initialNodes, + edges: initialEdges, nodesMap, showOnlySelectedNodes, selectedNodeId, @@ -152,6 +166,32 @@ function LineageLayoutBase< setSelectedEdges, } = useLineage() + const [nodes, setNodes] = React.useState(initialNodes) + const [edges, setEdges] = React.useState(initialEdges) + + const onNodesChange = React.useCallback( + (changes: NodeChange>[]) => { + setNodes( + applyNodeChanges>(changes, nodes), + ) + }, + [nodes, setNodes], + ) + + const onEdgesChange = React.useCallback( + ( + changes: EdgeChange>[], + ) => { + setEdges( + applyEdgeChanges>( + changes, + edges, + ), + ) + }, + [edges, setEdges], + ) + const updateZoom = React.useMemo(() => debounce(setZoom, 200), [setZoom]) const zoomToCurrentNode = React.useCallback( @@ -221,6 +261,14 @@ function LineageLayoutBase< [nodes, edges], ) + React.useEffect(() => { + setNodes(initialNodes) + }, [initialNodes]) + + React.useEffect(() => { + setEdges(initialEdges) + }, [initialEdges]) + React.useEffect(() => { if (selectedNodeId == null) { setShowOnlySelectedNodes(false) @@ -290,8 +338,6 @@ function LineageLayoutBase< React.useEffect(() => { if (currentNode?.id) { setSelectedNodeId(currentNode.id) - } else if (selectedNodeId) { - // setSelectedNodeId(selectedNodeId); } else { const node = nodes.length > 0 ? nodes[nodes.length - 1] : null @@ -332,8 +378,10 @@ function LineageLayoutBase< edges={edges} nodeTypes={nodeTypes} edgeTypes={edgeTypes} - nodesDraggable={false} - nodesConnectable={false} + onNodesChange={onNodesChange} + onEdgesChange={onEdgesChange} + nodesDraggable={nodesDraggable} + nodesConnectable={nodesConnectable} zoomOnDoubleClick={false} panOnScroll={true} zoomOnScroll={true} diff --git a/web/common/src/components/Lineage/stories/Lineage.stories.tsx b/web/common/src/components/Lineage/stories/Lineage.stories.tsx index 4ad8ca9f8b..6e16bed61e 100644 --- a/web/common/src/components/Lineage/stories/Lineage.stories.tsx +++ b/web/common/src/components/Lineage/stories/Lineage.stories.tsx @@ -17,33 +17,56 @@ export const LineageModel = () => { > { const [zoom, setZoom] = React.useState(ZOOM_THRESHOLD) const [isBuildingLayout, setIsBuildingLayout] = React.useState(false) + const [nodesDraggable, setNodesDraggable] = React.useState(false) const [edges, setEdges] = React.useState< LineageEdge[] >([]) @@ -388,6 +389,7 @@ export const ModelLineage = ({ nodeTypes={nodeTypes} edgeTypes={edgeTypes} className={className} + nodesDraggable={nodesDraggable} controls={ <> + setNodesDraggable(prev => !prev)} + disabled={isBuildingLayout} + > + + } /> From cbab32cc3a4bc54c0a4fb3f9e17853a42d54e5c7 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Fri, 3 Oct 2025 01:20:32 +0300 Subject: [PATCH 042/173] Chore: make `.sqlmesh` location configurable (#5474) --- docs/guides/configuration.md | 3 +++ sqlmesh/core/constants.py | 2 +- tests/core/engine_adapter/integration/conftest.py | 3 ++- tests/core/engine_adapter/test_trino.py | 6 +++--- tests/dbt/test_adapter.py | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index d2e294a589..d6d4f20c11 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -21,6 +21,9 @@ The sources have the following order of precedence: 2. `config.yaml` or `config.py` in the `~/.sqlmesh` folder. 3. `config.yaml` or `config.py` in a project folder. [LOWEST PRECEDENCE] +!!! note + To relocate the `.sqlmesh` folder, set the `SQLMESH_HOME` environment variable to your preferred directory path. + ### File type You can specify a SQLMesh configuration in either YAML or Python. diff --git a/sqlmesh/core/constants.py b/sqlmesh/core/constants.py index a1d117f4fb..66dadb0b5d 100644 --- a/sqlmesh/core/constants.py +++ b/sqlmesh/core/constants.py @@ -8,7 +8,7 @@ SQLMESH = "sqlmesh" SQLMESH_MANAGED = "sqlmesh_managed" -SQLMESH_PATH = Path.home() / ".sqlmesh" +SQLMESH_PATH = Path(os.getenv("SQLMESH_HOME") or Path.home() / ".sqlmesh") PROD = "prod" """Prod""" diff --git a/tests/core/engine_adapter/integration/conftest.py b/tests/core/engine_adapter/integration/conftest.py index 30f934da63..308819b671 100644 --- a/tests/core/engine_adapter/integration/conftest.py +++ b/tests/core/engine_adapter/integration/conftest.py @@ -9,6 +9,7 @@ from sqlmesh import Config, EngineAdapter +from sqlmesh.core.constants import SQLMESH_PATH from sqlmesh.core.config.connection import ( ConnectionConfig, AthenaConnectionConfig, @@ -34,7 +35,7 @@ def config(tmp_path: pathlib.Path) -> Config: project_paths=[ pathlib.Path(os.path.join(os.path.dirname(__file__), "config.yaml")), ], - personal_paths=[pathlib.Path("~/.sqlmesh/config.yaml").expanduser()], + personal_paths=[(SQLMESH_PATH / "config.yaml").expanduser()], variables={"tmp_path": str(tmp_path)}, ) diff --git a/tests/core/engine_adapter/test_trino.py b/tests/core/engine_adapter/test_trino.py index 526cb05b04..bf925c875a 100644 --- a/tests/core/engine_adapter/test_trino.py +++ b/tests/core/engine_adapter/test_trino.py @@ -669,7 +669,7 @@ def test_replace_table_catalog_support( adapter.replace_query( table_name=".".join([catalog_name, "schema", "test_table"]), - query_or_df=parse_one("SELECT 1 AS col"), + query_or_df=t.cast(exp.Query, parse_one("SELECT 1 AS col")), ) sql_calls = to_sql_calls(adapter) @@ -705,7 +705,7 @@ def test_insert_overwrite_time_partition_hive( adapter.insert_overwrite_by_time_partition( table_name=".".join(["my_catalog", "schema", "test_table"]), - query_or_df=parse_one("SELECT a, b FROM tbl"), + query_or_df=t.cast(exp.Query, parse_one("SELECT a, b FROM tbl")), start="2022-01-01", end="2022-01-02", time_column="b", @@ -743,7 +743,7 @@ def test_insert_overwrite_time_partition_iceberg( adapter.insert_overwrite_by_time_partition( table_name=".".join(["my_catalog", "schema", "test_table"]), - query_or_df=parse_one("SELECT a, b FROM tbl"), + query_or_df=t.cast(exp.Query, parse_one("SELECT a, b FROM tbl")), start="2022-01-01", end="2022-01-02", time_column="b", diff --git a/tests/dbt/test_adapter.py b/tests/dbt/test_adapter.py index 381401ce73..5570212668 100644 --- a/tests/dbt/test_adapter.py +++ b/tests/dbt/test_adapter.py @@ -39,7 +39,7 @@ def test_adapter_relation(sushi_test_project: Project, runtime_renderer: t.Calla table_name="foo.another", target_columns_to_types={"col": exp.DataType.build("int")} ) engine_adapter.create_view( - view_name="foo.bar_view", query_or_df=parse_one("select * from foo.bar") + view_name="foo.bar_view", query_or_df=t.cast(exp.Query, parse_one("select * from foo.bar")) ) engine_adapter.create_table( table_name="ignored.ignore", target_columns_to_types={"col": exp.DataType.build("int")} From 26bba970d6d12f9ebf2c7802ad2693bdf8700b52 Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Thu, 2 Oct 2025 16:42:43 -0700 Subject: [PATCH 043/173] fix(web_common): adjust types in lineage component (#5476) --- web/common/src/components/Lineage/help.ts | 4 +++- web/common/src/components/Lineage/index.ts | 1 + .../components/Lineage/stories/ModelNode.tsx | 4 ++-- .../Lineage/stories/dagreLayout.worker.ts | 24 +++++++++++++++---- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/web/common/src/components/Lineage/help.ts b/web/common/src/components/Lineage/help.ts index a052ff707b..1e5d5a9d6b 100644 --- a/web/common/src/components/Lineage/help.ts +++ b/web/common/src/components/Lineage/help.ts @@ -23,7 +23,9 @@ export function getOnlySelectedNodes< TNodeData extends LineageNodeData = LineageNodeData, TNodeID extends string = NodeId, >(nodeMaps: LineageNodesMap, selectedNodes: Set) { - return (Object.values(nodeMaps) as LineageNode[]).reduce( + return ( + Object.values(nodeMaps) satisfies LineageNode[] + ).reduce( (acc, node) => selectedNodes.has(node.id) ? { ...acc, [node.id]: node } : acc, {} as LineageNodesMap, diff --git a/web/common/src/components/Lineage/index.ts b/web/common/src/components/Lineage/index.ts index 0fbc17047c..4a0b6eccc7 100644 --- a/web/common/src/components/Lineage/index.ts +++ b/web/common/src/components/Lineage/index.ts @@ -21,6 +21,7 @@ export * from './node/useNodeMetadata' export * from './edge/EdgeWithGradient' export * from './edge/FactoryEdgeWithGradient' export * from './layout/dagreLayout' +export * from './layout/help' export * from './LineageColumnLevel/ColumnLevelLineageContext' export * from './LineageColumnLevel/FactoryColumn' export * from './LineageColumnLevel/useColumns' diff --git a/web/common/src/components/Lineage/stories/ModelNode.tsx b/web/common/src/components/Lineage/stories/ModelNode.tsx index 2f4705f1c1..b0bd2f7867 100644 --- a/web/common/src/components/Lineage/stories/ModelNode.tsx +++ b/web/common/src/components/Lineage/stories/ModelNode.tsx @@ -254,7 +254,7 @@ export const ModelNode = React.memo(function ModelNode({ className="p-1 first:border-t-0 h-6" columnLineageData={ ( - column as Column & { + column satisfies Column & { columnLineageData?: ColumnLevelLineageAdjacencyList< ModelNameType, ColumnName @@ -287,7 +287,7 @@ export const ModelNode = React.memo(function ModelNode({ className="p-1 border-t border-lineage-divider first:border-t-0 h-6" columnLineageData={ ( - column as Column & { + column satisfies Column & { columnLineageData?: ColumnLevelLineageAdjacencyList< ModelNameType, ColumnName diff --git a/web/common/src/components/Lineage/stories/dagreLayout.worker.ts b/web/common/src/components/Lineage/stories/dagreLayout.worker.ts index 1a6a9d3fe7..ce452f4808 100644 --- a/web/common/src/components/Lineage/stories/dagreLayout.worker.ts +++ b/web/common/src/components/Lineage/stories/dagreLayout.worker.ts @@ -2,23 +2,37 @@ import { type LayoutedGraph, type LineageEdgeData, type LineageNodeData, + type EdgeId, + type NodeId, + type PortId, } from '../utils' import { buildLayout } from '../layout/dagreLayout' self.onmessage = < TNodeData extends LineageNodeData = LineageNodeData, TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, >( - event: MessageEvent>, + event: MessageEvent< + LayoutedGraph + >, ) => { try { - const { edges, nodesMap } = buildLayout(event.data) + const { edges, nodesMap } = buildLayout< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + >(event.data) self.postMessage({ edges, nodesMap, - } as LayoutedGraph) - } catch (outerError) { - self.postMessage({ error: outerError } as { error: ErrorEvent }) + } satisfies LayoutedGraph) + } catch (error) { + self.postMessage({ error }) } } From 9ed67444bb4e7a662636fc9d0362c1b8cae7be7f Mon Sep 17 00:00:00 2001 From: David Dai Date: Thu, 2 Oct 2025 21:22:55 -0700 Subject: [PATCH 044/173] feat(experimental): add official support for model grants (#5275) Co-authored-by: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> --- .circleci/continue_config.yml | 2 +- .gitignore | 7 + sqlmesh/core/_typing.py | 1 + sqlmesh/core/engine_adapter/_typing.py | 2 + sqlmesh/core/engine_adapter/base.py | 147 +++ sqlmesh/core/engine_adapter/base_postgres.py | 8 + sqlmesh/core/engine_adapter/bigquery.py | 112 ++- sqlmesh/core/engine_adapter/databricks.py | 28 +- sqlmesh/core/engine_adapter/mixins.py | 143 ++- sqlmesh/core/engine_adapter/postgres.py | 6 + sqlmesh/core/engine_adapter/redshift.py | 4 + sqlmesh/core/engine_adapter/risingwave.py | 1 + sqlmesh/core/engine_adapter/snowflake.py | 59 +- sqlmesh/core/engine_adapter/spark.py | 4 +- sqlmesh/core/model/common.py | 1 + sqlmesh/core/model/definition.py | 29 + sqlmesh/core/model/kind.py | 5 + sqlmesh/core/model/meta.py | 101 ++ sqlmesh/core/snapshot/evaluator.py | 200 +++- sqlmesh/dbt/basemodel.py | 6 +- sqlmesh/dbt/model.py | 6 + ...0100_add_grants_and_grants_target_layer.py | 9 + .../engine_adapter/integration/__init__.py | 102 ++ .../integration/test_integration.py | 206 ++++ .../integration/test_integration_postgres.py | 938 ++++++++++++++++++ tests/core/engine_adapter/test_base.py | 105 ++ .../core/engine_adapter/test_base_postgres.py | 24 + tests/core/engine_adapter/test_bigquery.py | 181 +++- tests/core/engine_adapter/test_databricks.py | 181 +++- tests/core/engine_adapter/test_postgres.py | 105 ++ tests/core/engine_adapter/test_redshift.py | 150 ++- tests/core/engine_adapter/test_snowflake.py | 199 ++++ tests/core/engine_adapter/test_spark.py | 4 +- tests/core/test_context.py | 59 +- tests/core/test_model.py | 346 ++++++- tests/core/test_snapshot.py | 73 +- tests/core/test_snapshot_evaluator.py | 539 +++++++++- tests/dbt/test_model.py | 177 +++- 38 files changed, 4229 insertions(+), 41 deletions(-) create mode 100644 sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index c549c0ae78..c4b7bcbd53 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -148,7 +148,7 @@ jobs: command: ./.circleci/test_migration.sh sushi "--gateway duckdb_persistent" - run: name: Run the migration test - sushi_dbt - command: ./.circleci/test_migration.sh sushi_dbt "--config migration_test_config" + command: ./.circleci/test_migration.sh sushi_dbt "--config migration_test_config" ui_style: docker: diff --git a/.gitignore b/.gitignore index 72b41b5ce1..16593984dd 100644 --- a/.gitignore +++ b/.gitignore @@ -138,6 +138,12 @@ dmypy.json *~ *# +# Vim +*.swp +*.swo +.null-ls* + + *.duckdb *.duckdb.wal @@ -158,3 +164,4 @@ spark-warehouse/ # claude .claude/ + diff --git a/sqlmesh/core/_typing.py b/sqlmesh/core/_typing.py index e495df169e..8e28312c1a 100644 --- a/sqlmesh/core/_typing.py +++ b/sqlmesh/core/_typing.py @@ -11,6 +11,7 @@ SessionProperties = t.Dict[str, t.Union[exp.Expression, str, int, float, bool]] CustomMaterializationProperties = t.Dict[str, t.Union[exp.Expression, str, int, float, bool]] + if sys.version_info >= (3, 11): from typing import Self as Self else: diff --git a/sqlmesh/core/engine_adapter/_typing.py b/sqlmesh/core/engine_adapter/_typing.py index 98821bb2d4..77bcf2c015 100644 --- a/sqlmesh/core/engine_adapter/_typing.py +++ b/sqlmesh/core/engine_adapter/_typing.py @@ -30,3 +30,5 @@ ] QueryOrDF = t.Union[Query, DF] + GrantsConfig = t.Dict[str, t.List[str]] + DCL = t.TypeVar("DCL", exp.Grant, exp.Revoke) diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index d9cc4f44a2..ebbf136cd1 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -63,6 +63,7 @@ from sqlmesh.core.engine_adapter._typing import ( DF, BigframeSession, + GrantsConfig, PySparkDataFrame, PySparkSession, Query, @@ -114,6 +115,7 @@ class EngineAdapter: SUPPORTS_TUPLE_IN = True HAS_VIEW_BINDING = False SUPPORTS_REPLACE_TABLE = True + SUPPORTS_GRANTS = False DEFAULT_CATALOG_TYPE = DIALECT QUOTE_IDENTIFIERS_IN_VIEWS = True MAX_IDENTIFIER_LENGTH: t.Optional[int] = None @@ -2478,6 +2480,33 @@ def wap_publish(self, table_name: TableName, wap_id: str) -> None: """ raise NotImplementedError(f"Engine does not support WAP: {type(self)}") + def sync_grants_config( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> None: + """Applies the grants_config to a table authoritatively. + It first compares the specified grants against the current grants, and then + applies the diffs to the table by revoking and granting privileges as needed. + + Args: + table: The table/view to apply grants to. + grants_config: Dictionary mapping privileges to lists of grantees. + table_type: The type of database object (TABLE, VIEW, MATERIALIZED_VIEW). + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + + current_grants = self._get_current_grants_config(table) + new_grants, revoked_grants = self._diff_grants_configs(grants_config, current_grants) + revoke_exprs = self._revoke_grants_config_expr(table, revoked_grants, table_type) + grant_exprs = self._apply_grants_config_expr(table, new_grants, table_type) + dcl_exprs = revoke_exprs + grant_exprs + + if dcl_exprs: + self.execute(dcl_exprs) + @contextlib.contextmanager def transaction( self, @@ -3029,6 +3058,124 @@ def _check_identifier_length(self, expression: exp.Expression) -> None: def get_table_last_modified_ts(self, table_names: t.List[TableName]) -> t.List[int]: raise NotImplementedError() + @classmethod + def _diff_grants_configs( + cls, new_config: GrantsConfig, old_config: GrantsConfig + ) -> t.Tuple[GrantsConfig, GrantsConfig]: + """Compute additions and removals between two grants configurations. + + This method compares new (desired) and old (current) GrantsConfigs case-insensitively + for both privilege keys and grantees, while preserving original casing + in the output GrantsConfigs. + + Args: + new_config: Desired grants configuration (specified by the user). + old_config: Current grants configuration (returned by the database). + + Returns: + A tuple of (additions, removals) GrantsConfig where: + - additions contains privileges/grantees present in new_config but not in old_config + - additions uses keys and grantee strings from new_config (user-specified casing) + - removals contains privileges/grantees present in old_config but not in new_config + - removals uses keys and grantee strings from old_config (database-returned casing) + + Notes: + - Comparison is case-insensitive using casefold(); original casing is preserved in results. + - Overlapping grantees (case-insensitive) are excluded from the results. + """ + + def _diffs(config1: GrantsConfig, config2: GrantsConfig) -> GrantsConfig: + diffs: GrantsConfig = {} + cf_config2 = {k.casefold(): {g.casefold() for g in v} for k, v in config2.items()} + for key, grantees in config1.items(): + cf_key = key.casefold() + + # Missing key (add all grantees) + if cf_key not in cf_config2: + diffs[key] = grantees.copy() + continue + + # Include only grantees not in config2 + cf_grantees2 = cf_config2[cf_key] + diff_grantees = [] + for grantee in grantees: + if grantee.casefold() not in cf_grantees2: + diff_grantees.append(grantee) + if diff_grantees: + diffs[key] = diff_grantees + return diffs + + return _diffs(new_config, old_config), _diffs(old_config, new_config) + + def _get_current_grants_config(self, table: exp.Table) -> GrantsConfig: + """Returns current grants for a table as a dictionary. + + This method queries the database and returns the current grants/permissions + for the given table, parsed into a dictionary format. The it handles + case-insensitive comparison between these current grants and the desired + grants from model configuration. + + Args: + table: The table/view to query grants for. + + Returns: + Dictionary mapping permissions to lists of grantees. Permission names + should be returned as the database provides them (typically uppercase + for standard SQL permissions, but engine-specific roles may vary). + + Raises: + NotImplementedError: If the engine does not support grants. + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + raise NotImplementedError("Subclass must implement get_current_grants") + + def _apply_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expression]: + """Returns SQLGlot Grant expressions to apply grants to a table. + + Args: + table: The table/view to grant permissions on. + grants_config: Dictionary mapping permissions to lists of grantees. + table_type: The type of database object (TABLE, VIEW, MATERIALIZED_VIEW). + + Returns: + List of SQLGlot expressions for grant operations. + + Raises: + NotImplementedError: If the engine does not support grants. + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + raise NotImplementedError("Subclass must implement _apply_grants_config_expr") + + def _revoke_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expression]: + """Returns SQLGlot expressions to revoke grants from a table. + + Args: + table: The table/view to revoke permissions from. + grants_config: Dictionary mapping permissions to lists of grantees. + table_type: The type of database object (TABLE, VIEW, MATERIALIZED_VIEW). + + Returns: + List of SQLGlot expressions for revoke operations. + + Raises: + NotImplementedError: If the engine does not support grants. + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + raise NotImplementedError("Subclass must implement _revoke_grants_config_expr") + class EngineAdapterWithIndexSupport(EngineAdapter): SUPPORTS_INDEXES = True diff --git a/sqlmesh/core/engine_adapter/base_postgres.py b/sqlmesh/core/engine_adapter/base_postgres.py index 3de975d6a5..11f56da133 100644 --- a/sqlmesh/core/engine_adapter/base_postgres.py +++ b/sqlmesh/core/engine_adapter/base_postgres.py @@ -62,6 +62,7 @@ def columns( raise SQLMeshError( f"Could not get columns for table '{table.sql(dialect=self.dialect)}'. Table not found." ) + return { column_name: exp.DataType.build(data_type, dialect=self.dialect, udt=True) for column_name, data_type in resp @@ -196,3 +197,10 @@ def _get_data_objects( ) for row in df.itertuples() ] + + def _get_current_schema(self) -> str: + """Returns the current default schema for the connection.""" + result = self.fetchone(exp.select(exp.func("current_schema"))) + if result and result[0]: + return result[0] + return "public" diff --git a/sqlmesh/core/engine_adapter/bigquery.py b/sqlmesh/core/engine_adapter/bigquery.py index 09fd7537ef..59a56b6ace 100644 --- a/sqlmesh/core/engine_adapter/bigquery.py +++ b/sqlmesh/core/engine_adapter/bigquery.py @@ -11,6 +11,7 @@ from sqlmesh.core.engine_adapter.base import _get_data_object_cache_key from sqlmesh.core.engine_adapter.mixins import ( ClusteredByMixin, + GrantsFromInfoSchemaMixin, RowDiffMixin, TableAlterClusterByOperation, ) @@ -40,7 +41,7 @@ from google.cloud.bigquery.table import Table as BigQueryTable from sqlmesh.core._typing import SchemaName, SessionProperties, TableName - from sqlmesh.core.engine_adapter._typing import BigframeSession, DF, Query + from sqlmesh.core.engine_adapter._typing import BigframeSession, DCL, DF, GrantsConfig, Query from sqlmesh.core.engine_adapter.base import QueryOrDF @@ -55,7 +56,7 @@ @set_catalog() -class BigQueryEngineAdapter(ClusteredByMixin, RowDiffMixin): +class BigQueryEngineAdapter(ClusteredByMixin, RowDiffMixin, GrantsFromInfoSchemaMixin): """ BigQuery Engine Adapter using the `google-cloud-bigquery` library's DB API. """ @@ -65,6 +66,11 @@ class BigQueryEngineAdapter(ClusteredByMixin, RowDiffMixin): SUPPORTS_TRANSACTIONS = False SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_CLONING = True + SUPPORTS_GRANTS = True + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expression = exp.func("session_user") + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = True + USE_CATALOG_IN_GRANTS = True + GRANT_INFORMATION_SCHEMA_TABLE_NAME = "OBJECT_PRIVILEGES" MAX_TABLE_COMMENT_LENGTH = 1024 MAX_COLUMN_COMMENT_LENGTH = 1024 SUPPORTS_QUERY_EXECUTION_TRACKING = True @@ -1326,6 +1332,108 @@ def _session_id(self) -> t.Any: def _session_id(self, value: t.Any) -> None: self._connection_pool.set_attribute("session_id", value) + def _get_current_schema(self) -> str: + raise NotImplementedError("BigQuery does not support current schema") + + def _get_bq_dataset_location(self, project: str, dataset: str) -> str: + return self._db_call(self.client.get_dataset, dataset_ref=f"{project}.{dataset}").location + + def _get_grant_expression(self, table: exp.Table) -> exp.Expression: + if not table.db: + raise ValueError( + f"Table {table.sql(dialect=self.dialect)} does not have a schema (dataset)" + ) + project = table.catalog or self.get_current_catalog() + if not project: + raise ValueError( + f"Table {table.sql(dialect=self.dialect)} does not have a catalog (project)" + ) + + dataset = table.db + table_name = table.name + location = self._get_bq_dataset_location(project, dataset) + + # https://cloud.google.com/bigquery/docs/information-schema-object-privileges + # OBJECT_PRIVILEGES is a project-level INFORMATION_SCHEMA view with regional qualifier + object_privileges_table = exp.to_table( + f"`{project}`.`region-{location}`.INFORMATION_SCHEMA.{self.GRANT_INFORMATION_SCHEMA_TABLE_NAME}", + dialect=self.dialect, + ) + return ( + exp.select("privilege_type", "grantee") + .from_(object_privileges_table) + .where( + exp.and_( + exp.column("object_schema").eq(exp.Literal.string(dataset)), + exp.column("object_name").eq(exp.Literal.string(table_name)), + # Filter out current_user + # BigQuery grantees format: "user:email" or "group:name" + exp.func("split", exp.column("grantee"), exp.Literal.string(":"))[ + exp.func("OFFSET", exp.Literal.number("1")) + ].neq(self.CURRENT_USER_OR_ROLE_EXPRESSION), + ) + ) + ) + + @staticmethod + def _grant_object_kind(table_type: DataObjectType) -> str: + if table_type == DataObjectType.VIEW: + return "VIEW" + if table_type == DataObjectType.MATERIALIZED_VIEW: + # We actually need to use "MATERIALIZED VIEW" here even though it's not listed + # as a supported resource_type in the BigQuery DCL doc: + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language + return "MATERIALIZED VIEW" + return "TABLE" + + def _dcl_grants_config_expr( + self, + dcl_cmd: t.Type[DCL], + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expression]: + expressions: t.List[exp.Expression] = [] + if not grants_config: + return expressions + + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language + + def normalize_principal(p: str) -> str: + if ":" not in p: + raise ValueError(f"Principal '{p}' missing a prefix label") + + # allUsers and allAuthenticatedUsers special groups that are cas-sensitive and must start with "specialGroup:" + if p.endswith("allUsers") or p.endswith("allAuthenticatedUsers"): + if not p.startswith("specialGroup:"): + raise ValueError( + f"Special group principal '{p}' must start with 'specialGroup:' prefix label" + ) + return p + + label, principal = p.split(":", 1) + # always lowercase principals + return f"{label}:{principal.lower()}" + + object_kind = self._grant_object_kind(table_type) + for privilege, principals in grants_config.items(): + if not principals: + continue + + noramlized_principals = [exp.Literal.string(normalize_principal(p)) for p in principals] + args: t.Dict[str, t.Any] = { + "privileges": [exp.GrantPrivilege(this=exp.to_identifier(privilege, quoted=True))], + "securable": table.copy(), + "principals": noramlized_principals, + } + + if object_kind: + args["kind"] = exp.Var(this=object_kind) + + expressions.append(dcl_cmd(**args)) # type: ignore[arg-type] + + return expressions + class _ErrorCounter: """ diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py index 173e1b08af..7521124684 100644 --- a/sqlmesh/core/engine_adapter/databricks.py +++ b/sqlmesh/core/engine_adapter/databricks.py @@ -5,7 +5,9 @@ from functools import partial from sqlglot import exp + from sqlmesh.core.dialect import to_schema +from sqlmesh.core.engine_adapter.mixins import GrantsFromInfoSchemaMixin from sqlmesh.core.engine_adapter.shared import ( CatalogSupport, DataObject, @@ -28,12 +30,14 @@ logger = logging.getLogger(__name__) -class DatabricksEngineAdapter(SparkEngineAdapter): +class DatabricksEngineAdapter(SparkEngineAdapter, GrantsFromInfoSchemaMixin): DIALECT = "databricks" INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.REPLACE_WHERE SUPPORTS_CLONING = True SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True + SUPPORTS_GRANTS = True + USE_CATALOG_IN_GRANTS = True # Spark has this set to false for compatibility when mixing with Trino but that isn't a concern with Databricks QUOTE_IDENTIFIERS_IN_VIEWS = True SCHEMA_DIFFER_KWARGS = { @@ -151,6 +155,28 @@ def spark(self) -> PySparkSession: def catalog_support(self) -> CatalogSupport: return CatalogSupport.FULL_SUPPORT + @staticmethod + def _grant_object_kind(table_type: DataObjectType) -> str: + if table_type == DataObjectType.VIEW: + return "VIEW" + if table_type == DataObjectType.MATERIALIZED_VIEW: + return "MATERIALIZED VIEW" + return "TABLE" + + def _get_grant_expression(self, table: exp.Table) -> exp.Expression: + # We only care about explicitly granted privileges and not inherited ones + # if this is removed you would see grants inherited from the catalog get returned + expression = super()._get_grant_expression(table) + expression.args["where"].set( + "this", + exp.and_( + expression.args["where"].this, + exp.column("inherited_from").eq(exp.Literal.string("NONE")), + wrap=False, + ), + ) + return expression + def _begin_session(self, properties: SessionProperties) -> t.Any: """Begin a new session.""" # Align the different possible connectors to a single catalog diff --git a/sqlmesh/core/engine_adapter/mixins.py b/sqlmesh/core/engine_adapter/mixins.py index 1d66da0607..c8ef32b9da 100644 --- a/sqlmesh/core/engine_adapter/mixins.py +++ b/sqlmesh/core/engine_adapter/mixins.py @@ -7,8 +7,10 @@ from sqlglot import exp, parse_one from sqlglot.helper import seq_get +from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlmesh.core.engine_adapter.base import EngineAdapter +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.node import IntervalUnit from sqlmesh.core.dialect import schema_ from sqlmesh.core.schema_diff import TableAlterOperation @@ -16,7 +18,12 @@ if t.TYPE_CHECKING: from sqlmesh.core._typing import TableName - from sqlmesh.core.engine_adapter._typing import DF + from sqlmesh.core.engine_adapter._typing import ( + DCL, + DF, + GrantsConfig, + QueryOrDF, + ) from sqlmesh.core.engine_adapter.base import QueryOrDF logger = logging.getLogger(__name__) @@ -548,3 +555,137 @@ def _normalize_decimal_value(self, expr: exp.Expression, precision: int) -> exp. def _normalize_boolean_value(self, expr: exp.Expression) -> exp.Expression: return exp.cast(expr, "INT") + + +class GrantsFromInfoSchemaMixin(EngineAdapter): + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expression = exp.func("current_user") + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = False + USE_CATALOG_IN_GRANTS = False + GRANT_INFORMATION_SCHEMA_TABLE_NAME = "table_privileges" + + @staticmethod + @abc.abstractmethod + def _grant_object_kind(table_type: DataObjectType) -> t.Optional[str]: + pass + + @abc.abstractmethod + def _get_current_schema(self) -> str: + pass + + def _dcl_grants_config_expr( + self, + dcl_cmd: t.Type[DCL], + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expression]: + expressions: t.List[exp.Expression] = [] + if not grants_config: + return expressions + + object_kind = self._grant_object_kind(table_type) + for privilege, principals in grants_config.items(): + args: t.Dict[str, t.Any] = { + "privileges": [exp.GrantPrivilege(this=exp.Var(this=privilege))], + "securable": table.copy(), + } + if object_kind: + args["kind"] = exp.Var(this=object_kind) + if self.SUPPORTS_MULTIPLE_GRANT_PRINCIPALS: + args["principals"] = [ + normalize_identifiers( + parse_one(principal, into=exp.GrantPrincipal, dialect=self.dialect), + dialect=self.dialect, + ) + for principal in principals + ] + expressions.append(dcl_cmd(**args)) # type: ignore[arg-type] + else: + for principal in principals: + args["principals"] = [ + normalize_identifiers( + parse_one(principal, into=exp.GrantPrincipal, dialect=self.dialect), + dialect=self.dialect, + ) + ] + expressions.append(dcl_cmd(**args)) # type: ignore[arg-type] + + return expressions + + def _apply_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expression]: + return self._dcl_grants_config_expr(exp.Grant, table, grants_config, table_type) + + def _revoke_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expression]: + return self._dcl_grants_config_expr(exp.Revoke, table, grants_config, table_type) + + def _get_grant_expression(self, table: exp.Table) -> exp.Expression: + schema_identifier = table.args.get("db") or normalize_identifiers( + exp.to_identifier(self._get_current_schema(), quoted=True), dialect=self.dialect + ) + schema_name = schema_identifier.this + table_name = table.args.get("this").this # type: ignore + + grant_conditions = [ + exp.column("table_schema").eq(exp.Literal.string(schema_name)), + exp.column("table_name").eq(exp.Literal.string(table_name)), + exp.column("grantor").eq(self.CURRENT_USER_OR_ROLE_EXPRESSION), + exp.column("grantee").neq(self.CURRENT_USER_OR_ROLE_EXPRESSION), + ] + + info_schema_table = normalize_identifiers( + exp.table_(self.GRANT_INFORMATION_SCHEMA_TABLE_NAME, db="information_schema"), + dialect=self.dialect, + ) + if self.USE_CATALOG_IN_GRANTS: + catalog_identifier = table.args.get("catalog") + if not catalog_identifier: + catalog_name = self.get_current_catalog() + if not catalog_name: + raise SQLMeshError( + "Current catalog could not be determined for fetching grants. This is unexpected." + ) + catalog_identifier = normalize_identifiers( + exp.to_identifier(catalog_name, quoted=True), dialect=self.dialect + ) + catalog_name = catalog_identifier.this + info_schema_table.set("catalog", catalog_identifier.copy()) + grant_conditions.insert( + 0, exp.column("table_catalog").eq(exp.Literal.string(catalog_name)) + ) + + return ( + exp.select("privilege_type", "grantee") + .from_(info_schema_table) + .where(exp.and_(*grant_conditions)) + ) + + def _get_current_grants_config(self, table: exp.Table) -> GrantsConfig: + grant_expr = self._get_grant_expression(table) + + results = self.fetchall(grant_expr) + + grants_dict: GrantsConfig = {} + for privilege_raw, grantee_raw in results: + if privilege_raw is None or grantee_raw is None: + continue + + privilege = str(privilege_raw) + grantee = str(grantee_raw) + if not privilege or not grantee: + continue + + grantees = grants_dict.setdefault(privilege, []) + if grantee not in grantees: + grantees.append(grantee) + + return grants_dict diff --git a/sqlmesh/core/engine_adapter/postgres.py b/sqlmesh/core/engine_adapter/postgres.py index 79431ee360..3dd108cf91 100644 --- a/sqlmesh/core/engine_adapter/postgres.py +++ b/sqlmesh/core/engine_adapter/postgres.py @@ -12,6 +12,7 @@ PandasNativeFetchDFSupportMixin, RowDiffMixin, logical_merge, + GrantsFromInfoSchemaMixin, ) from sqlmesh.core.engine_adapter.shared import set_catalog @@ -28,14 +29,19 @@ class PostgresEngineAdapter( PandasNativeFetchDFSupportMixin, GetCurrentCatalogFromFunctionMixin, RowDiffMixin, + GrantsFromInfoSchemaMixin, ): DIALECT = "postgres" + SUPPORTS_GRANTS = True SUPPORTS_INDEXES = True HAS_VIEW_BINDING = True CURRENT_CATALOG_EXPRESSION = exp.column("current_catalog") SUPPORTS_REPLACE_TABLE = False MAX_IDENTIFIER_LENGTH: t.Optional[int] = 63 SUPPORTS_QUERY_EXECUTION_TRACKING = True + GRANT_INFORMATION_SCHEMA_TABLE_NAME = "role_table_grants" + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expression = exp.column("current_role") + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = True SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { # DECIMAL without precision is "up to 131072 digits before the decimal point; up to 16383 digits after the decimal point" diff --git a/sqlmesh/core/engine_adapter/redshift.py b/sqlmesh/core/engine_adapter/redshift.py index 7979268473..03dc89053e 100644 --- a/sqlmesh/core/engine_adapter/redshift.py +++ b/sqlmesh/core/engine_adapter/redshift.py @@ -14,6 +14,7 @@ VarcharSizeWorkaroundMixin, RowDiffMixin, logical_merge, + GrantsFromInfoSchemaMixin, ) from sqlmesh.core.engine_adapter.shared import ( CommentCreationView, @@ -40,12 +41,15 @@ class RedshiftEngineAdapter( NonTransactionalTruncateMixin, VarcharSizeWorkaroundMixin, RowDiffMixin, + GrantsFromInfoSchemaMixin, ): DIALECT = "redshift" CURRENT_CATALOG_EXPRESSION = exp.func("current_database") # Redshift doesn't support comments for VIEWs WITH NO SCHEMA BINDING (which we always use) COMMENT_CREATION_VIEW = CommentCreationView.UNSUPPORTED SUPPORTS_REPLACE_TABLE = False + SUPPORTS_GRANTS = True + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = True SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { diff --git a/sqlmesh/core/engine_adapter/risingwave.py b/sqlmesh/core/engine_adapter/risingwave.py index fdcee90f0f..61b44f5bbb 100644 --- a/sqlmesh/core/engine_adapter/risingwave.py +++ b/sqlmesh/core/engine_adapter/risingwave.py @@ -32,6 +32,7 @@ class RisingwaveEngineAdapter(PostgresEngineAdapter): SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_TRANSACTIONS = False MAX_IDENTIFIER_LENGTH = None + SUPPORTS_GRANTS = False def columns( self, table_name: TableName, include_pseudo_columns: bool = False diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py index 1554589779..a8eabe070d 100644 --- a/sqlmesh/core/engine_adapter/snowflake.py +++ b/sqlmesh/core/engine_adapter/snowflake.py @@ -15,6 +15,7 @@ GetCurrentCatalogFromFunctionMixin, ClusteredByMixin, RowDiffMixin, + GrantsFromInfoSchemaMixin, ) from sqlmesh.core.engine_adapter.shared import ( CatalogSupport, @@ -34,7 +35,12 @@ import pandas as pd from sqlmesh.core._typing import SchemaName, SessionProperties, TableName - from sqlmesh.core.engine_adapter._typing import DF, Query, QueryOrDF, SnowparkSession + from sqlmesh.core.engine_adapter._typing import ( + DF, + Query, + QueryOrDF, + SnowparkSession, + ) from sqlmesh.core.node import IntervalUnit @@ -46,7 +52,9 @@ "drop_catalog": CatalogSupport.REQUIRES_SET_CATALOG, # needs a catalog to issue a query to information_schema.databases even though the result is global } ) -class SnowflakeEngineAdapter(GetCurrentCatalogFromFunctionMixin, ClusteredByMixin, RowDiffMixin): +class SnowflakeEngineAdapter( + GetCurrentCatalogFromFunctionMixin, ClusteredByMixin, RowDiffMixin, GrantsFromInfoSchemaMixin +): DIALECT = "snowflake" SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True @@ -74,6 +82,9 @@ class SnowflakeEngineAdapter(GetCurrentCatalogFromFunctionMixin, ClusteredByMixi MANAGED_TABLE_KIND = "DYNAMIC TABLE" SNOWPARK = "snowpark" SUPPORTS_QUERY_EXECUTION_TRACKING = True + SUPPORTS_GRANTS = True + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expression = exp.func("CURRENT_ROLE") + USE_CATALOG_IN_GRANTS = True @contextlib.contextmanager def session(self, properties: SessionProperties) -> t.Iterator[None]: @@ -128,6 +139,23 @@ def snowpark(self) -> t.Optional[SnowparkSession]: def catalog_support(self) -> CatalogSupport: return CatalogSupport.FULL_SUPPORT + @staticmethod + def _grant_object_kind(table_type: DataObjectType) -> str: + if table_type == DataObjectType.VIEW: + return "VIEW" + if table_type == DataObjectType.MATERIALIZED_VIEW: + return "MATERIALIZED VIEW" + if table_type == DataObjectType.MANAGED_TABLE: + return "DYNAMIC TABLE" + return "TABLE" + + def _get_current_schema(self) -> str: + """Returns the current default schema for the connection.""" + result = self.fetchone("SELECT CURRENT_SCHEMA()") + if not result or not result[0]: + raise SQLMeshError("Unable to determine current schema") + return str(result[0]) + def _create_catalog(self, catalog_name: exp.Identifier) -> None: props = exp.Properties( expressions=[exp.SchemaCommentProperty(this=exp.Literal.string(c.SQLMESH_MANAGED))] @@ -533,13 +561,32 @@ def _get_data_objects( for row in df.rename(columns={col: col.lower() for col in df.columns}).itertuples() ] + def _get_grant_expression(self, table: exp.Table) -> exp.Expression: + # Upon execute the catalog in table expressions are properly normalized to handle the case where a user provides + # the default catalog in their connection config. This doesn't though update catalogs in strings like when querying + # the information schema. So we need to manually replace those here. + expression = super()._get_grant_expression(table) + for col_exp in expression.find_all(exp.Column): + if col_exp.this.name == "table_catalog": + and_exp = col_exp.parent + assert and_exp is not None, "Expected column expression to have a parent" + assert and_exp.expression, "Expected AND expression to have an expression" + normalized_catalog = self._normalize_catalog( + exp.table_("placeholder", db="placeholder", catalog=and_exp.expression.this) + ) + and_exp.set( + "expression", + exp.Literal.string(normalized_catalog.args["catalog"].alias_or_name), + ) + return expression + def set_current_catalog(self, catalog: str) -> None: self.execute(exp.Use(this=exp.to_identifier(catalog))) def set_current_schema(self, schema: str) -> None: self.execute(exp.Use(kind="SCHEMA", this=to_schema(schema))) - def _to_sql(self, expression: exp.Expression, quote: bool = True, **kwargs: t.Any) -> str: + def _normalize_catalog(self, expression: exp.Expression) -> exp.Expression: # note: important to use self._default_catalog instead of the self.default_catalog property # otherwise we get RecursionError: maximum recursion depth exceeded # because it calls get_current_catalog(), which executes a query, which needs the default catalog, which calls get_current_catalog()... etc @@ -572,8 +619,12 @@ def catalog_rewriter(node: exp.Expression) -> exp.Expression: # Snowflake connection config. This is because the catalog present on the model gets normalized and quoted to match # the source dialect, which isnt always compatible with Snowflake expression = expression.transform(catalog_rewriter) + return expression - return super()._to_sql(expression=expression, quote=quote, **kwargs) + def _to_sql(self, expression: exp.Expression, quote: bool = True, **kwargs: t.Any) -> str: + return super()._to_sql( + expression=self._normalize_catalog(expression), quote=quote, **kwargs + ) def _create_column_comments( self, diff --git a/sqlmesh/core/engine_adapter/spark.py b/sqlmesh/core/engine_adapter/spark.py index b2d6a9cbb5..5216b0a329 100644 --- a/sqlmesh/core/engine_adapter/spark.py +++ b/sqlmesh/core/engine_adapter/spark.py @@ -397,7 +397,7 @@ def get_current_catalog(self) -> t.Optional[str]: def set_current_catalog(self, catalog_name: str) -> None: self.connection.set_current_catalog(catalog_name) - def get_current_database(self) -> str: + def _get_current_schema(self) -> str: if self._use_spark_session: return self.spark.catalog.currentDatabase() return self.fetchone(exp.select(exp.func("current_database")))[0] # type: ignore @@ -539,7 +539,7 @@ def _ensure_fqn(self, table_name: TableName) -> exp.Table: if not table.catalog: table.set("catalog", self.get_current_catalog()) if not table.db: - table.set("db", self.get_current_database()) + table.set("db", self._get_current_schema()) return table def _build_create_comment_column_exp( diff --git a/sqlmesh/core/model/common.py b/sqlmesh/core/model/common.py index 0a55f80cee..d2b9a11c08 100644 --- a/sqlmesh/core/model/common.py +++ b/sqlmesh/core/model/common.py @@ -641,6 +641,7 @@ def parse_strings_with_macro_refs(value: t.Any, dialect: DialectType) -> t.Any: "physical_properties_", "virtual_properties_", "materialization_properties_", + "grants_", mode="before", check_fields=False, )(parse_properties) diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py index 974901cb55..f81dae004b 100644 --- a/sqlmesh/core/model/definition.py +++ b/sqlmesh/core/model/definition.py @@ -67,6 +67,7 @@ from sqlmesh.core.context import ExecutionContext from sqlmesh.core.engine_adapter import EngineAdapter from sqlmesh.core.engine_adapter._typing import QueryOrDF + from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.linter.rule import Rule from sqlmesh.core.snapshot import DeployabilityIndex, Node, Snapshot from sqlmesh.utils.jinja import MacroReference @@ -1186,6 +1187,8 @@ def metadata_hash(self) -> str: gen(self.session_properties_) if self.session_properties_ else None, *[gen(g) for g in self.grains], *self._audit_metadata_hash_values(), + json.dumps(self.grants, sort_keys=True) if self.grants else None, + self.grants_target_layer, ] for key, value in (self.virtual_properties or {}).items(): @@ -1210,6 +1213,24 @@ def is_model(self) -> bool: """Return True if this is a model node""" return True + @property + def grants_table_type(self) -> DataObjectType: + """Get the table type for grants application (TABLE, VIEW, MATERIALIZED_VIEW). + + Returns: + The DataObjectType that should be used when applying grants to this model. + """ + from sqlmesh.core.engine_adapter.shared import DataObjectType + + if self.kind.is_view: + if hasattr(self.kind, "materialized") and getattr(self.kind, "materialized", False): + return DataObjectType.MATERIALIZED_VIEW + return DataObjectType.VIEW + if self.kind.is_managed: + return DataObjectType.MANAGED_TABLE + # All other materialized models are tables + return DataObjectType.TABLE + @property def _additional_metadata(self) -> t.List[str]: additional_metadata = [] @@ -1823,6 +1844,12 @@ def _data_hash_values_no_sql(self) -> t.List[str]: for column_name, column_hash in self.column_hashes.items(): data.append(column_name) data.append(column_hash) + + # Include grants in data hash for seed models to force recreation on grant changes + # since seed models don't support migration + data.append(json.dumps(self.grants, sort_keys=True) if self.grants else "") + data.append(self.grants_target_layer) + return data @@ -3023,6 +3050,8 @@ def render_expression( "optimize_query": str, "virtual_environment_mode": lambda value: exp.Literal.string(value.value), "dbt_node_info_": lambda value: value.to_expression(), + "grants_": lambda value: value, + "grants_target_layer": lambda value: exp.Literal.string(value.value), } diff --git a/sqlmesh/core/model/kind.py b/sqlmesh/core/model/kind.py index 7b8e88ac17..cc4c6f0826 100644 --- a/sqlmesh/core/model/kind.py +++ b/sqlmesh/core/model/kind.py @@ -154,6 +154,11 @@ def full_history_restatement_only(self) -> bool: def supports_python_models(self) -> bool: return True + @property + def supports_grants(self) -> bool: + """Whether this model kind supports grants configuration.""" + return self.is_materialized or self.is_view + class ModelKindName(str, ModelKindMixin, Enum): """The kind of model, determining how this data is computed and stored in the warehouse.""" diff --git a/sqlmesh/core/model/meta.py b/sqlmesh/core/model/meta.py index 9208fbdbb5..c48b7d1524 100644 --- a/sqlmesh/core/model/meta.py +++ b/sqlmesh/core/model/meta.py @@ -1,6 +1,7 @@ from __future__ import annotations import typing as t +from enum import Enum from functools import cached_property from typing_extensions import Self @@ -13,6 +14,7 @@ from sqlmesh.core.config.common import VirtualEnvironmentMode from sqlmesh.core.config.linter import LinterConfig from sqlmesh.core.dialect import normalize_model_name +from sqlmesh.utils import classproperty from sqlmesh.core.model.common import ( bool_validator, default_catalog_validator, @@ -46,10 +48,41 @@ if t.TYPE_CHECKING: from sqlmesh.core._typing import CustomMaterializationProperties, SessionProperties + from sqlmesh.core.engine_adapter._typing import GrantsConfig FunctionCall = t.Tuple[str, t.Dict[str, exp.Expression]] +class GrantsTargetLayer(str, Enum): + """Target layer(s) where grants should be applied.""" + + ALL = "all" + PHYSICAL = "physical" + VIRTUAL = "virtual" + + @classproperty + def default(cls) -> "GrantsTargetLayer": + return GrantsTargetLayer.VIRTUAL + + @property + def is_all(self) -> bool: + return self == GrantsTargetLayer.ALL + + @property + def is_physical(self) -> bool: + return self == GrantsTargetLayer.PHYSICAL + + @property + def is_virtual(self) -> bool: + return self == GrantsTargetLayer.VIRTUAL + + def __str__(self) -> str: + return self.name + + def __repr__(self) -> str: + return str(self) + + class ModelMeta(_Node): """Metadata for models which can be defined in SQL.""" @@ -85,6 +118,8 @@ class ModelMeta(_Node): ) formatting: t.Optional[bool] = Field(default=None, exclude=True) virtual_environment_mode: VirtualEnvironmentMode = VirtualEnvironmentMode.default + grants_: t.Optional[exp.Tuple] = Field(default=None, alias="grants") + grants_target_layer: GrantsTargetLayer = GrantsTargetLayer.default _bool_validator = bool_validator _model_kind_validator = model_kind_validator @@ -287,6 +322,14 @@ def _refs_validator(cls, vs: t.Any, info: ValidationInfo) -> t.List[exp.Expressi def ignored_rules_validator(cls, vs: t.Any) -> t.Any: return LinterConfig._validate_rules(vs) + @field_validator("grants_target_layer", mode="before") + def _grants_target_layer_validator(cls, v: t.Any) -> t.Any: + if isinstance(v, exp.Identifier): + return v.this + if isinstance(v, exp.Literal) and v.is_string: + return v.this + return v + @field_validator("session_properties_", mode="before") def session_properties_validator(cls, v: t.Any, info: ValidationInfo) -> t.Any: # use the generic properties validator to parse the session properties @@ -394,6 +437,10 @@ def _root_validator(self) -> Self: f"Model {self.name} has `storage_format` set to a table format '{storage_format}' which is deprecated. Please use the `table_format` property instead." ) + # Validate grants configuration for model kind support + if self.grants is not None and not kind.supports_grants: + raise ValueError(f"grants cannot be set for {kind.name} models") + return self @property @@ -465,6 +512,30 @@ def custom_materialization_properties(self) -> CustomMaterializationProperties: return self.kind.materialization_properties return {} + @cached_property + def grants(self) -> t.Optional[GrantsConfig]: + """A dictionary of grants mapping permission names to lists of grantees.""" + + if self.grants_ is None: + return None + + if not self.grants_.expressions: + return {} + + grants_dict = {} + for eq_expr in self.grants_.expressions: + try: + permission_name = self._validate_config_expression(eq_expr.left) + grantee_list = self._validate_nested_config_values(eq_expr.expression) + grants_dict[permission_name] = grantee_list + except ConfigError as e: + permission_name = ( + eq_expr.left.name if hasattr(eq_expr.left, "name") else str(eq_expr.left) + ) + raise ConfigError(f"Invalid grants configuration for '{permission_name}': {e}") + + return grants_dict if grants_dict else None + @property def all_references(self) -> t.List[Reference]: """All references including grains.""" @@ -529,3 +600,33 @@ def on_additive_change(self) -> OnAdditiveChange: @property def ignored_rules(self) -> t.Set[str]: return self.ignored_rules_ or set() + + def _validate_config_expression(self, expr: exp.Expression) -> str: + if isinstance(expr, (d.MacroFunc, d.MacroVar)): + raise ConfigError(f"Unresolved macro: {expr.sql(dialect=self.dialect)}") + + if isinstance(expr, exp.Null): + raise ConfigError("NULL value") + + if isinstance(expr, exp.Literal): + return str(expr.this).strip() + if isinstance(expr, (exp.Column, exp.Identifier)): + return expr.name + return expr.sql(dialect=self.dialect).strip() + + def _validate_nested_config_values(self, value_expr: exp.Expression) -> t.List[str]: + result = [] + + def flatten_expr(expr: exp.Expression) -> None: + if isinstance(expr, exp.Array): + for elem in expr.expressions: + flatten_expr(elem) + elif isinstance(expr, (exp.Tuple, exp.Paren)): + expressions = [expr.unnest()] if isinstance(expr, exp.Paren) else expr.expressions + for elem in expressions: + flatten_expr(elem) + else: + result.append(self._validate_config_expression(expr)) + + flatten_expr(value_expr) + return result diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 1483bdeece..2676709d85 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -39,6 +39,7 @@ from sqlmesh.core.audit import Audit, StandaloneAudit from sqlmesh.core.dialect import schema_ from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, DataObjectType, DataObject +from sqlmesh.core.model.meta import GrantsTargetLayer from sqlmesh.core.macros import RuntimeStage from sqlmesh.core.model import ( AuditResult, @@ -932,6 +933,7 @@ def _render_and_insert_snapshot( model = snapshot.model adapter = self.get_adapter(model.gateway) evaluation_strategy = _evaluation_strategy(snapshot, adapter) + is_snapshot_deployable = deployability_index.is_deployable(snapshot) queries_or_dfs = self._render_snapshot_for_evaluation( snapshot, @@ -955,6 +957,7 @@ def apply(query_or_df: QueryOrDF, index: int = 0) -> None: execution_time=execution_time, physical_properties=rendered_physical_properties, render_kwargs=create_render_kwargs, + is_snapshot_deployable=is_snapshot_deployable, ) else: logger.info( @@ -977,6 +980,7 @@ def apply(query_or_df: QueryOrDF, index: int = 0) -> None: execution_time=execution_time, physical_properties=rendered_physical_properties, render_kwargs=create_render_kwargs, + is_snapshot_deployable=is_snapshot_deployable, ) # DataFrames, unlike SQL expressions, can provide partial results by yielding dataframes. As a result, @@ -1066,6 +1070,7 @@ def _clone_snapshot_in_dev( allow_additive_snapshots=allow_additive_snapshots, run_pre_post_statements=run_pre_post_statements, ) + except Exception: adapter.drop_table(target_table_name) raise @@ -1166,6 +1171,7 @@ def _migrate_target_table( rendered_physical_properties=rendered_physical_properties, dry_run=False, run_pre_post_statements=run_pre_post_statements, + skip_grants=True, # skip grants for tmp table ) try: evaluation_strategy = _evaluation_strategy(snapshot, adapter) @@ -1183,6 +1189,7 @@ def _migrate_target_table( allow_additive_snapshots=allow_additive_snapshots, ignore_destructive=snapshot.model.on_destructive_change.is_ignore, ignore_additive=snapshot.model.on_additive_change.is_ignore, + deployability_index=deployability_index, ) finally: if snapshot.is_materialized: @@ -1232,6 +1239,7 @@ def _promote_snapshot( model=snapshot.model, environment=environment_naming_info.name, snapshots=snapshots, + snapshot=snapshot, **render_kwargs, ) @@ -1431,6 +1439,7 @@ def _execute_create( rendered_physical_properties: t.Dict[str, exp.Expression], dry_run: bool, run_pre_post_statements: bool = True, + skip_grants: bool = False, ) -> None: adapter = self.get_adapter(snapshot.model.gateway) evaluation_strategy = _evaluation_strategy(snapshot, adapter) @@ -1451,11 +1460,14 @@ def _execute_create( table_name=table_name, model=snapshot.model, is_table_deployable=is_table_deployable, + skip_grants=skip_grants, render_kwargs=create_render_kwargs, is_snapshot_deployable=is_snapshot_deployable, is_snapshot_representative=is_snapshot_representative, dry_run=dry_run, physical_properties=rendered_physical_properties, + snapshot=snapshot, + deployability_index=deployability_index, ) if run_pre_post_statements: evaluation_strategy.run_post_statements( @@ -1469,7 +1481,7 @@ def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex and snapshot.is_materialized and bool(snapshot.previous_versions) and adapter.SUPPORTS_CLONING - # managed models cannot have their schema mutated because theyre based on queries, so clone + alter wont work + # managed models cannot have their schema mutated because they're based on queries, so clone + alter won't work and not snapshot.is_managed and not snapshot.is_dbt_custom and not deployability_index.is_deployable(snapshot) @@ -1690,6 +1702,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: """Creates the target table or view. @@ -1780,6 +1793,66 @@ def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: render_kwargs: Additional key-value arguments to pass when rendering the statements. """ + def _apply_grants( + self, + model: Model, + table_name: str, + target_layer: GrantsTargetLayer, + is_snapshot_deployable: bool = False, + ) -> None: + """Apply grants for a model if grants are configured. + + This method provides consistent grants application across all evaluation strategies. + It ensures that whenever a physical database object (table, view, materialized view) + is created or modified, the appropriate grants are applied. + + Args: + model: The SQLMesh model containing grants configuration + table_name: The target table/view name to apply grants to + target_layer: The grants application layer (physical or virtual) + is_snapshot_deployable: Whether the snapshot is deployable (targeting production) + """ + grants_config = model.grants + if grants_config is None: + return + + if not self.adapter.SUPPORTS_GRANTS: + logger.warning( + f"Engine {self.adapter.__class__.__name__} does not support grants. " + f"Skipping grants application for model {model.name}" + ) + return + + model_grants_target_layer = model.grants_target_layer + deployable_vde_dev_only = ( + is_snapshot_deployable and model.virtual_environment_mode.is_dev_only + ) + + # table_type is always a VIEW in the virtual layer unless model is deployable and VDE is dev_only + # in which case we fall back to the model's model_grants_table_type + if target_layer == GrantsTargetLayer.VIRTUAL and not deployable_vde_dev_only: + model_grants_table_type = DataObjectType.VIEW + else: + model_grants_table_type = model.grants_table_type + + if ( + model_grants_target_layer.is_all + or model_grants_target_layer == target_layer + # Always apply grants in production when VDE is dev_only regardless of target_layer + # since only physical tables are created in production + or deployable_vde_dev_only + ): + logger.info(f"Applying grants for model {model.name} to table {table_name}") + self.adapter.sync_grants_config( + exp.to_table(table_name, dialect=self.adapter.dialect), + grants_config, + model_grants_table_type, + ) + else: + logger.debug( + f"Skipping grants application for model {model.name} in {target_layer} layer" + ) + class SymbolicStrategy(EvaluationStrategy): def insert( @@ -1809,6 +1882,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: pass @@ -1890,6 +1964,17 @@ def promote( view_properties=model.render_virtual_properties(**render_kwargs), ) + snapshot = kwargs.get("snapshot") + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) + if snapshot and deployability_index + else False + ) + + # Apply grants to the virtual layer (view) after promotion + self._apply_grants(model, view_name, GrantsTargetLayer.VIRTUAL, is_snapshot_deployable) + def demote(self, view_name: str, **kwargs: t.Any) -> None: logger.info("Dropping view '%s'", view_name) self.adapter.drop_view(view_name, cascade=False) @@ -1908,6 +1993,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: ctas_query = model.ctas_query(**render_kwargs) @@ -1952,6 +2038,13 @@ def create( column_descriptions=model.column_descriptions if is_table_deployable else None, ) + # Apply grants after table creation (unless explicitly skipped by caller) + if not skip_grants: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def migrate( self, target_table_name: str, @@ -1977,6 +2070,15 @@ def migrate( ) self.adapter.alter_table(alter_operations) + # Apply grants after schema migration + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) if deployability_index else False + ) + self._apply_grants( + snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def delete(self, name: str, **kwargs: t.Any) -> None: _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) self.adapter.drop_table(name, cascade=kwargs.pop("cascade", False)) @@ -1988,6 +2090,7 @@ def _replace_query_for_model( name: str, query_or_df: QueryOrDF, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool = False, **kwargs: t.Any, ) -> None: """Replaces the table for the given model. @@ -2024,6 +2127,11 @@ def _replace_query_for_model( source_columns=source_columns, ) + # Apply grants after table replacement (unless explicitly skipped by caller) + if not skip_grants: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants(model, name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) + def _get_target_and_source_columns( self, model: Model, @@ -2271,6 +2379,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: model = t.cast(SeedModel, model) @@ -2284,16 +2393,37 @@ def create( ) return - super().create(table_name, model, is_table_deployable, render_kwargs, **kwargs) + super().create( + table_name, + model, + is_table_deployable, + render_kwargs, + skip_grants=True, # Skip grants; they're applied after data insertion + **kwargs, + ) # For seeds we insert data at the time of table creation. try: for index, df in enumerate(model.render_seed()): if index == 0: - self._replace_query_for_model(model, table_name, df, render_kwargs, **kwargs) + self._replace_query_for_model( + model, + table_name, + df, + render_kwargs, + skip_grants=True, # Skip grants; they're applied after data insertion + **kwargs, + ) else: self.adapter.insert_append( table_name, df, target_columns_to_types=model.columns_to_types ) + + if not skip_grants: + # Apply grants after seed table creation and data insertion + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) except Exception: self.adapter.drop_table(table_name) raise @@ -2341,6 +2471,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: assert isinstance(model.kind, (SCDType2ByTimeKind, SCDType2ByColumnKind)) @@ -2370,9 +2501,17 @@ def create( model, is_table_deployable, render_kwargs, + skip_grants, **kwargs, ) + if not skip_grants: + # Apply grants after SCD Type 2 table creation + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def insert( self, table_name: str, @@ -2440,6 +2579,10 @@ def insert( f"Unexpected SCD Type 2 kind: {model.kind}. This is not expected and please report this as a bug." ) + # Apply grants after SCD Type 2 table recreation + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) + def append( self, table_name: str, @@ -2496,6 +2639,10 @@ def insert( column_descriptions=model.column_descriptions, ) + # Apply grants after view creation / replacement + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) + def append( self, table_name: str, @@ -2512,12 +2659,21 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + if self.adapter.table_exists(table_name): # Make sure we don't recreate the view to prevent deletion of downstream views in engines with no late # binding support (because of DROP CASCADE). logger.info("View '%s' already exists", table_name) + + if not skip_grants: + # Always apply grants when present, even if view exists, to handle grants updates + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) return logger.info("Creating view '%s'", table_name) @@ -2541,6 +2697,12 @@ def create( column_descriptions=model.column_descriptions if is_table_deployable else None, ) + if not skip_grants: + # Apply grants after view creation + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def migrate( self, target_table_name: str, @@ -2567,6 +2729,15 @@ def migrate( column_descriptions=model.column_descriptions, ) + # Apply grants after view migration + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) if deployability_index else False + ) + self._apply_grants( + snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def delete(self, name: str, **kwargs: t.Any) -> None: cascade = kwargs.pop("cascade", False) try: @@ -2723,6 +2894,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: original_query = model.render_query_or_raise(**render_kwargs) @@ -2852,6 +3024,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: is_snapshot_deployable: bool = kwargs["is_snapshot_deployable"] @@ -2870,6 +3043,13 @@ def create( column_descriptions=model.column_descriptions, table_format=model.table_format, ) + + # Apply grants after managed table creation + if not skip_grants: + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + elif not is_table_deployable: # Only create the dev preview table as a normal table. # For the main table, if the snapshot is cant be deployed to prod (eg upstream is forward-only) do nothing. @@ -2880,6 +3060,7 @@ def create( model=model, is_table_deployable=is_table_deployable, render_kwargs=render_kwargs, + skip_grants=skip_grants, **kwargs, ) @@ -2895,7 +3076,6 @@ def insert( deployability_index: DeployabilityIndex = kwargs["deployability_index"] snapshot: Snapshot = kwargs["snapshot"] is_snapshot_deployable = deployability_index.is_deployable(snapshot) - if is_first_insert and is_snapshot_deployable and not self.adapter.table_exists(table_name): self.adapter.create_managed_table( table_name=table_name, @@ -2908,6 +3088,9 @@ def insert( column_descriptions=model.column_descriptions, table_format=model.table_format, ) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) elif not is_snapshot_deployable: # Snapshot isnt deployable; update the preview table instead # If the snapshot was deployable, then data would have already been loaded in create() because a managed table would have been created @@ -2956,6 +3139,15 @@ def migrate( f"The schema of the managed model '{target_table_name}' cannot be updated in a forward-only fashion." ) + # Apply grants after verifying no schema changes + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) if deployability_index else False + ) + self._apply_grants( + snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def delete(self, name: str, **kwargs: t.Any) -> None: # a dev preview table is created as a normal table, so it needs to be dropped as a normal table _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 0b75955129..3e325f13e6 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -165,7 +165,11 @@ def _validate_hooks(cls, v: t.Union[str, t.List[t.Union[SqlStr, str]]]) -> t.Lis @field_validator("grants", mode="before") @classmethod - def _validate_grants(cls, v: t.Dict[str, str]) -> t.Dict[str, t.List[str]]: + def _validate_grants( + cls, v: t.Optional[t.Dict[str, str]] + ) -> t.Optional[t.Dict[str, t.List[str]]]: + if v is None: + return None return {key: ensure_list(value) for key, value in v.items()} _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index f47283d06e..f21eefe95d 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -679,6 +679,12 @@ def to_sqlmesh( if physical_properties: model_kwargs["physical_properties"] = physical_properties + kind = self.model_kind(context) + + # A falsy grants config (None or {}) is considered as unmanaged per dbt semantics + if self.grants and kind.supports_grants: + model_kwargs["grants"] = self.grants + allow_partials = model_kwargs.pop("allow_partials", None) if allow_partials is None: # Set allow_partials to True for dbt models to preserve the original semantics. diff --git a/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py b/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py new file mode 100644 index 0000000000..fa23935da0 --- /dev/null +++ b/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py @@ -0,0 +1,9 @@ +"""Add grants and grants_target_layer to incremental model metadata hash.""" + + +def migrate_schemas(state_sync, **kwargs): # type: ignore + pass + + +def migrate_rows(state_sync, **kwargs): # type: ignore + pass diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index c5377e309a..49624154e4 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -5,10 +5,12 @@ import sys import typing as t import time +from contextlib import contextmanager import pandas as pd # noqa: TID253 import pytest from sqlglot import exp, parse_one +from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlmesh import Config, Context, EngineAdapter from sqlmesh.core.config import load_config_from_paths @@ -744,6 +746,106 @@ def upsert_sql_model(self, model_definition: str) -> t.Tuple[Context, SqlModel]: self._context.upsert_model(model) return self._context, model + def _get_create_user_or_role( + self, username: str, password: t.Optional[str] = None + ) -> t.Tuple[str, t.Optional[str]]: + password = password or random_id() + if self.dialect in ["postgres", "redshift"]: + return username, f"CREATE USER \"{username}\" WITH PASSWORD '{password}'" + if self.dialect == "snowflake": + return username, f"CREATE ROLE {username}" + if self.dialect == "databricks": + # Creating an account-level group in Databricks requires making REST API calls so we are going to + # use a pre-created group instead. We assume the suffix on the name is the unique id + return "_".join(username.split("_")[:-1]), None + if self.dialect == "bigquery": + # BigQuery uses IAM service accounts that need to be pre-created + # Pre-created GCP service accounts: + # - sqlmesh-test-admin@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-analyst@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-etl-user@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-reader@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-user@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-writer@{project-id}.iam.gserviceaccount.com + role_name = ( + username.replace(f"_{self.test_id}", "").replace("test_", "").replace("_", "-") + ) + project_id = self.engine_adapter.get_current_catalog() + service_account = f"sqlmesh-test-{role_name}@{project_id}.iam.gserviceaccount.com" + return f"serviceAccount:{service_account}", None + raise ValueError(f"User creation not supported for dialect: {self.dialect}") + + def _create_user_or_role(self, username: str, password: t.Optional[str] = None) -> str: + username, create_user_sql = self._get_create_user_or_role(username, password) + if create_user_sql: + self.engine_adapter.execute(create_user_sql) + return username + + @contextmanager + def create_users_or_roles(self, *role_names: str) -> t.Iterator[t.Dict[str, str]]: + created_users = [] + roles = {} + + try: + for role_name in role_names: + user_name = normalize_identifiers( + self.add_test_suffix(f"test_{role_name}"), dialect=self.dialect + ).sql(dialect=self.dialect) + password = random_id() + if self.dialect == "redshift": + password += ( + "A" # redshift requires passwords to have at least one uppercase letter + ) + user_name = self._create_user_or_role(user_name, password) + created_users.append(user_name) + roles[role_name] = user_name + + yield roles + + finally: + for user_name in created_users: + self._cleanup_user_or_role(user_name) + + def get_select_privilege(self) -> str: + if self.dialect == "bigquery": + return "roles/bigquery.dataViewer" + return "SELECT" + + def get_insert_privilege(self) -> str: + if self.dialect == "databricks": + # This would really be "MODIFY" but for the purposes of having this be unique from UPDATE + # we return "MANAGE" instead + return "MANAGE" + if self.dialect == "bigquery": + return "roles/bigquery.dataEditor" + return "INSERT" + + def get_update_privilege(self) -> str: + if self.dialect == "databricks": + return "MODIFY" + if self.dialect == "bigquery": + return "roles/bigquery.dataOwner" + return "UPDATE" + + def _cleanup_user_or_role(self, user_name: str) -> None: + """Helper function to clean up a user/role and all their dependencies.""" + try: + if self.dialect in ["postgres", "redshift"]: + self.engine_adapter.execute(f""" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE usename = '{user_name}' AND pid <> pg_backend_pid() + """) + self.engine_adapter.execute(f'DROP OWNED BY "{user_name}"') + self.engine_adapter.execute(f'DROP USER IF EXISTS "{user_name}"') + elif self.dialect == "snowflake": + self.engine_adapter.execute(f"DROP ROLE IF EXISTS {user_name}") + elif self.dialect in ["databricks", "bigquery"]: + # For Databricks and BigQuery, we use pre-created accounts that should not be deleted + pass + except Exception: + pass + def wait_until(fn: t.Callable[..., bool], attempts=3, wait=5) -> None: current_attempt = 0 diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 995875c778..5e976f8dd5 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -4027,3 +4027,209 @@ def test_unicode_characters(ctx: TestContext, tmp_path: Path): table_results = ctx.get_metadata_results(schema) assert len(table_results.tables) == 1 assert table_results.tables[0].lower().startswith(schema_name.lower() + "________") + + +def test_sync_grants_config(ctx: TestContext) -> None: + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + table = ctx.table("sync_grants_integration") + select_privilege = ctx.get_select_privilege() + insert_privilege = ctx.get_insert_privilege() + update_privilege = ctx.get_update_privilege() + with ctx.create_users_or_roles("reader", "writer", "admin") as roles: + ctx.engine_adapter.create_table(table, {"id": exp.DataType.build("INT")}) + + initial_grants = { + select_privilege: [roles["reader"]], + insert_privilege: [roles["writer"]], + } + ctx.engine_adapter.sync_grants_config(table, initial_grants) + + current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert set(current_grants.get(select_privilege, [])) == {roles["reader"]} + assert set(current_grants.get(insert_privilege, [])) == {roles["writer"]} + + target_grants = { + select_privilege: [roles["writer"], roles["admin"]], + update_privilege: [roles["admin"]], + } + ctx.engine_adapter.sync_grants_config(table, target_grants) + + synced_grants = ctx.engine_adapter._get_current_grants_config(table) + assert set(synced_grants.get(select_privilege, [])) == { + roles["writer"], + roles["admin"], + } + assert set(synced_grants.get(update_privilege, [])) == {roles["admin"]} + assert synced_grants.get(insert_privilege, []) == [] + + +def test_grants_sync_empty_config(ctx: TestContext): + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + table = ctx.table("grants_empty_test") + select_privilege = ctx.get_select_privilege() + insert_privilege = ctx.get_insert_privilege() + with ctx.create_users_or_roles("user") as roles: + ctx.engine_adapter.create_table(table, {"id": exp.DataType.build("INT")}) + + initial_grants = { + select_privilege: [roles["user"]], + insert_privilege: [roles["user"]], + } + ctx.engine_adapter.sync_grants_config(table, initial_grants) + + initial_current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert roles["user"] in initial_current_grants.get(select_privilege, []) + assert roles["user"] in initial_current_grants.get(insert_privilege, []) + + ctx.engine_adapter.sync_grants_config(table, {}) + + final_grants = ctx.engine_adapter._get_current_grants_config(table) + assert final_grants == {} + + +def test_grants_case_insensitive_grantees(ctx: TestContext): + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + with ctx.create_users_or_roles("reader", "writer") as roles: + table = ctx.table("grants_quoted_test") + ctx.engine_adapter.create_table(table, {"id": exp.DataType.build("INT")}) + + reader = roles["reader"] + writer = roles["writer"] + select_privilege = ctx.get_select_privilege() + + if ctx.dialect == "bigquery": + # BigQuery labels are case sensitive, e.g. serviceAccount + lablel, grantee = writer.split(":", 1) + upper_case_writer = f"{lablel}:{grantee.upper()}" + else: + upper_case_writer = writer.upper() + + grants_config = {select_privilege: [reader, upper_case_writer]} + ctx.engine_adapter.sync_grants_config(table, grants_config) + + # Grantees are still in lowercase + current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert reader in current_grants.get(select_privilege, []) + assert writer in current_grants.get(select_privilege, []) + + # Revoke writer + grants_config = {select_privilege: [reader.upper()]} + ctx.engine_adapter.sync_grants_config(table, grants_config) + + current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert reader in current_grants.get(select_privilege, []) + assert writer not in current_grants.get(select_privilege, []) + + +def test_grants_plan(ctx: TestContext, tmp_path: Path): + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + table = ctx.table("grant_model").sql(dialect="duckdb") + select_privilege = ctx.get_select_privilege() + insert_privilege = ctx.get_insert_privilege() + with ctx.create_users_or_roles("analyst", "etl_user") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name {table}, + kind FULL, + grants ( + '{select_privilege}' = ['{roles["analyst"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, CURRENT_DATE as created_date + """ + + (tmp_path / "models" / "grant_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + + # Physical layer w/ grants + table_name = snapshot.table_name() + view_name = snapshot.qualified_view_name.for_environment( + plan_result.environment_naming_info, dialect=ctx.dialect + ) + current_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=ctx.dialect) + ) + assert current_grants == {select_privilege: [roles["analyst"]]} + + # Virtual layer (view) w/ grants + virtual_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=ctx.dialect) + ) + assert virtual_grants == {select_privilege: [roles["analyst"]]} + + # Update model with query change and new grants + updated_model = load_sql_based_model( + d.parse( + f""" + MODEL ( + name {table}, + kind FULL, + grants ( + '{select_privilege}' = ['{roles["analyst"]}', '{roles["etl_user"]}'], + '{insert_privilege}' = ['{roles["etl_user"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, CURRENT_DATE as created_date, 'v2' as version + """, + default_dialect=context.default_dialect, + ), + dialect=context.default_dialect, + ) + context.upsert_model(updated_model) + + plan = context.plan(auto_apply=True, no_prompts=True) + plan_result = PlanResults.create(plan, ctx, ctx.add_test_suffix(TEST_SCHEMA)) + assert len(plan_result.plan.directly_modified) == 1 + + new_snapshot = plan_result.snapshot_for(updated_model) + assert new_snapshot is not None + + new_table_name = new_snapshot.table_name() + final_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(new_table_name, dialect=ctx.dialect) + ) + expected_final_grants = { + select_privilege: [roles["analyst"], roles["etl_user"]], + insert_privilege: [roles["etl_user"]], + } + assert set(final_grants.get(select_privilege, [])) == set( + expected_final_grants[select_privilege] + ) + assert final_grants.get(insert_privilege, []) == expected_final_grants[insert_privilege] + + # Virtual layer should also have the updated grants + updated_virtual_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=ctx.dialect) + ) + assert set(updated_virtual_grants.get(select_privilege, [])) == set( + expected_final_grants[select_privilege] + ) + assert ( + updated_virtual_grants.get(insert_privilege, []) + == expected_final_grants[insert_privilege] + ) diff --git a/tests/core/engine_adapter/integration/test_integration_postgres.py b/tests/core/engine_adapter/integration/test_integration_postgres.py index 26b8cbda42..f236fdebce 100644 --- a/tests/core/engine_adapter/integration/test_integration_postgres.py +++ b/tests/core/engine_adapter/integration/test_integration_postgres.py @@ -1,9 +1,11 @@ import typing as t +from contextlib import contextmanager import pytest from pytest import FixtureRequest from pathlib import Path from sqlmesh.core.engine_adapter import PostgresEngineAdapter from sqlmesh.core.config import Config, DuckDBConnectionConfig +from sqlmesh.core.config.common import VirtualEnvironmentMode from tests.core.engine_adapter.integration import TestContext import time_machine from datetime import timedelta @@ -12,6 +14,7 @@ from sqlmesh.core.context import Context from sqlmesh.core.state_sync import CachingStateSync, EngineAdapterStateSync from sqlmesh.core.snapshot.definition import SnapshotId +from sqlmesh.utils import random_id from tests.core.engine_adapter.integration import ( TestContext, @@ -22,6 +25,87 @@ ) +def _cleanup_user(engine_adapter: PostgresEngineAdapter, user_name: str) -> None: + """Helper function to clean up a PostgreSQL user and all their dependencies.""" + try: + engine_adapter.execute(f""" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE usename = '{user_name}' AND pid <> pg_backend_pid() + """) + engine_adapter.execute(f'DROP OWNED BY "{user_name}"') + engine_adapter.execute(f'DROP USER IF EXISTS "{user_name}"') + except Exception: + pass + + +@contextmanager +def create_users( + engine_adapter: PostgresEngineAdapter, *role_names: str +) -> t.Iterator[t.Dict[str, t.Dict[str, str]]]: + """Create a set of Postgres users and yield their credentials.""" + created_users = [] + roles = {} + + try: + for role_name in role_names: + user_name = f"test_{role_name}" + _cleanup_user(engine_adapter, user_name) + + for role_name in role_names: + user_name = f"test_{role_name}" + password = random_id() + engine_adapter.execute(f"CREATE USER \"{user_name}\" WITH PASSWORD '{password}'") + engine_adapter.execute(f'GRANT USAGE ON SCHEMA public TO "{user_name}"') + created_users.append(user_name) + roles[role_name] = {"username": user_name, "password": password} + + yield roles + + finally: + for user_name in created_users: + _cleanup_user(engine_adapter, user_name) + + +def create_engine_adapter_for_role( + role_credentials: t.Dict[str, str], ctx: TestContext, config: Config +) -> PostgresEngineAdapter: + """Create a PostgreSQL adapter for a specific role to test authentication and permissions.""" + from sqlmesh.core.config import PostgresConnectionConfig + + gateway = ctx.gateway + assert gateway in config.gateways + connection_config = config.gateways[gateway].connection + assert isinstance(connection_config, PostgresConnectionConfig) + + role_connection_config = PostgresConnectionConfig( + host=connection_config.host, + port=connection_config.port, + database=connection_config.database, + user=role_credentials["username"], + password=role_credentials["password"], + keepalives_idle=connection_config.keepalives_idle, + connect_timeout=connection_config.connect_timeout, + role=connection_config.role, + sslmode=connection_config.sslmode, + application_name=connection_config.application_name, + ) + + return t.cast(PostgresEngineAdapter, role_connection_config.create_engine_adapter()) + + +@contextmanager +def engine_adapter_for_role( + role_credentials: t.Dict[str, str], ctx: TestContext, config: Config +) -> t.Iterator[PostgresEngineAdapter]: + """Context manager that yields a PostgresEngineAdapter and ensures it is closed.""" + adapter = create_engine_adapter_for_role(role_credentials, ctx, config) + try: + yield adapter + finally: + adapter.close() + + @pytest.fixture(params=list(generate_pytest_params(ENGINES_BY_NAME["postgres"]))) def ctx( request: FixtureRequest, @@ -286,3 +370,857 @@ def _mutate_config(gateway: str, config: Config): assert after_objects.views == [ exp.to_table(model_b_prod_snapshot.table_name()).text("this") ] + + +# Grants Integration Tests + + +def test_grants_plan_target_layer_physical_only( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = """ + MODEL ( + name test_schema.physical_grants_model, + kind FULL, + grants ( + 'select' = ['test_reader'] + ), + grants_target_layer 'physical' + ); + SELECT 1 as id, 'physical_only' as layer + """ + + (tmp_path / "models" / "physical_grants_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + physical_table_name = snapshot.table_name() + + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert physical_grants == {"SELECT": [roles["reader"]["username"]]} + + # Virtual layer should have no grants + virtual_view_name = f"test_schema.physical_grants_model" + virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert virtual_grants == {} + + +def test_grants_plan_target_layer_virtual_only( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "viewer") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = """ + MODEL ( + name test_schema.virtual_grants_model, + kind FULL, + grants ( + 'select' = ['test_viewer'] + ), + grants_target_layer 'virtual' + ); + SELECT 1 as id, 'virtual_only' as layer + """ + + (tmp_path / "models" / "virtual_grants_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + physical_table_name = snapshot.table_name() + + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + # Physical table should have no grants + assert physical_grants == {} + + virtual_view_name = f"test_schema.virtual_grants_model" + virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert virtual_grants == {"SELECT": [roles["viewer"]["username"]]} + + +def test_grants_plan_full_refresh_model_via_replace( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + (tmp_path / "models" / "full_refresh_model.sql").write_text( + f""" + MODEL ( + name test_schema.full_refresh_model, + kind FULL, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'test_data' as status + """ + ) + + context = ctx.create_context(path=tmp_path) + + plan_result = context.plan( + "dev", # this triggers _replace_query_for_model for FULL models + auto_apply=True, + no_prompts=True, + ) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + table_name = snapshot.table_name() + + # Physical table + grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert grants == {"SELECT": [roles["reader"]["username"]]} + + # Virtual view + dev_view_name = "test_schema__dev.full_refresh_model" + dev_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_view_name, dialect=engine_adapter.dialect) + ) + assert dev_grants == {"SELECT": [roles["reader"]["username"]]} + + +def test_grants_plan_incremental_model( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader", "writer") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_name = "incr_model" + model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, @start_ds::timestamp as ts, 'data' as value + """ + + (tmp_path / "models" / f"{model_name}.sql").write_text(model_definition) + + context = ctx.create_context(path=tmp_path) + + plan_result = context.plan( + "dev", start="2020-01-01", end="2020-01-01", auto_apply=True, no_prompts=True + ) + assert len(plan_result.new_snapshots) == 1 + + snapshot = plan_result.new_snapshots[0] + table_name = snapshot.table_name() + + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert physical_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert physical_grants.get("INSERT", []) == [roles["writer"]["username"]] + + view_name = f"test_schema__dev.{model_name}" + view_grants = engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=engine_adapter.dialect) + ) + assert view_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert view_grants.get("INSERT", []) == [roles["writer"]["username"]] + + +def test_grants_plan_clone_environment( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + (tmp_path / "models" / "clone_model.sql").write_text( + f""" + MODEL ( + name test_schema.clone_model, + kind FULL, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'] + ), + grants_target_layer 'all' + ); + + SELECT 1 as id, 'data' as value + """ + ) + + context = ctx.create_context(path=tmp_path) + prod_plan_result = context.plan("prod", auto_apply=True, no_prompts=True) + + assert len(prod_plan_result.new_snapshots) == 1 + prod_snapshot = prod_plan_result.new_snapshots[0] + prod_table_name = prod_snapshot.table_name() + + # Prod physical table grants + prod_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_table_name, dialect=engine_adapter.dialect) + ) + assert prod_grants == {"SELECT": [roles["reader"]["username"]]} + + # Prod virtual view grants + prod_view_name = f"test_schema.clone_model" + prod_view_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_view_name, dialect=engine_adapter.dialect) + ) + assert prod_view_grants == {"SELECT": [roles["reader"]["username"]]} + + # Create dev environment (cloned from prod) + context.plan("dev", auto_apply=True, no_prompts=True, include_unmodified=True) + + # Physical table grants should remain unchanged + prod_grants_after_clone = engine_adapter._get_current_grants_config( + exp.to_table(prod_table_name, dialect=engine_adapter.dialect) + ) + assert prod_grants_after_clone == prod_grants + + # Dev virtual view should have the same grants as prod + dev_view_name = f"test_schema__dev.clone_model" + dev_view_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_view_name, dialect=engine_adapter.dialect) + ) + assert dev_view_grants == prod_grants + + +@pytest.mark.parametrize( + "model_name,kind_config,query,extra_config,needs_seed", + [ + ( + "grants_full", + "FULL", + "SELECT 1 as id, 'unchanged_query' as data", + "", + False, + ), + ( + "grants_view", + "VIEW", + "SELECT 1 as id, 'unchanged_query' as data", + "", + False, + ), + ( + "grants_incr_time", + "INCREMENTAL_BY_TIME_RANGE (time_column event_date)", + "SELECT '2025-09-01'::date as event_date, 1 as id, 'unchanged_query' as data", + "start '2025-09-01',", + False, + ), + ( + "grants_seed", + "SEED (path '../seeds/grants_seed.csv')", + "", + "", + True, + ), + ], +) +def test_grants_metadata_only_changes( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + model_name: str, + kind_config: str, + query: str, + extra_config: str, + needs_seed: bool, +): + with create_users(engine_adapter, "reader", "writer", "admin") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + if needs_seed: + (tmp_path / "seeds").mkdir(exist_ok=True) + csv_content = "id,data\\n1,unchanged_query" + (tmp_path / "seeds" / f"{model_name}.csv").write_text(csv_content) + + initial_model_def = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + {extra_config} + grants ( + 'select' = ['{roles["reader"]["username"]}'] + ), + grants_target_layer 'all' + ); + {query} + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(initial_model_def) + + context = ctx.create_context(path=tmp_path) + initial_plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(initial_plan_result.new_snapshots) == 1 + initial_snapshot = initial_plan_result.new_snapshots[0] + + physical_table_name = initial_snapshot.table_name() + virtual_view_name = f"test_schema.{model_name}" + + initial_physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert initial_physical_grants == {"SELECT": [roles["reader"]["username"]]} + + initial_virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert initial_virtual_grants == {"SELECT": [roles["reader"]["username"]]} + + # Metadata-only change: update grants only using upsert_model + existing_model = context.get_model(f"test_schema.{model_name}") + context.upsert_model( + existing_model, + grants={ + "select": [roles["writer"]["username"], roles["admin"]["username"]], + "insert": [roles["admin"]["username"]], + }, + ) + second_plan_result = context.plan(auto_apply=True, no_prompts=True) + + expected_grants = { + "SELECT": [roles["writer"]["username"], roles["admin"]["username"]], + "INSERT": [roles["admin"]["username"]], + } + + # For seed models, grant changes rebuild the entire table, so it will create a new physical table + if model_name == "grants_seed" and second_plan_result.new_snapshots: + updated_snapshot = second_plan_result.new_snapshots[0] + physical_table_name = updated_snapshot.table_name() + + updated_physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert set(updated_physical_grants.get("SELECT", [])) == set(expected_grants["SELECT"]) + assert updated_physical_grants.get("INSERT", []) == expected_grants["INSERT"] + + updated_virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert set(updated_virtual_grants.get("SELECT", [])) == set(expected_grants["SELECT"]) + assert updated_virtual_grants.get("INSERT", []) == expected_grants["INSERT"] + + +def _vde_dev_only_config(gateway: str, config: Config) -> None: + config.virtual_environment_mode = VirtualEnvironmentMode.DEV_ONLY + + +@pytest.mark.parametrize( + "grants_target_layer,model_kind", + [ + ("virtual", "FULL"), + ("physical", "FULL"), + ("all", "FULL"), + ("virtual", "VIEW"), + ("physical", "VIEW"), + ], +) +def test_grants_target_layer_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + grants_target_layer: str, + model_kind: str, +): + with create_users(engine_adapter, "reader", "writer") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + if model_kind == "VIEW": + grants_config = ( + f"'SELECT' = ['{roles['reader']['username']}', '{roles['writer']['username']}']" + ) + else: + grants_config = f""" + 'SELECT' = ['{roles["reader"]["username"]}', '{roles["writer"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + """.strip() + + model_def = f""" + MODEL ( + name test_schema.vde_model_{grants_target_layer}_{model_kind.lower()}, + kind {model_kind}, + grants ( + {grants_config} + ), + grants_target_layer '{grants_target_layer}' + ); + SELECT 1 as id, '{grants_target_layer}_{model_kind}' as test_type + """ + ( + tmp_path / "models" / f"vde_model_{grants_target_layer}_{model_kind.lower()}.sql" + ).write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + context.plan("prod", auto_apply=True, no_prompts=True) + + table_name = f"test_schema.vde_model_{grants_target_layer}_{model_kind.lower()}" + + # In VDE dev_only mode, VIEWs are created as actual views + assert context.engine_adapter.table_exists(table_name) + + grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["reader"]["username"] in grants.get("SELECT", []) + assert roles["writer"]["username"] in grants.get("SELECT", []) + + if model_kind != "VIEW": + assert roles["writer"]["username"] in grants.get("INSERT", []) + + +def test_grants_incremental_model_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "etl", "analyst") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name test_schema.vde_incremental_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column event_date + ), + grants ( + 'SELECT' = ['{roles["analyst"]["username"]}'], + 'INSERT' = ['{roles["etl"]["username"]}'] + ), + grants_target_layer 'virtual' + ); + SELECT + 1 as id, + @start_date::date as event_date, + 'event' as event_type + """ + (tmp_path / "models" / "vde_incremental_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + context.plan("prod", auto_apply=True, no_prompts=True) + + prod_table = "test_schema.vde_incremental_model" + prod_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_table, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in prod_grants.get("SELECT", []) + assert roles["etl"]["username"] in prod_grants.get("INSERT", []) + + +@pytest.mark.parametrize( + "change_type,initial_query,updated_query,expect_schema_change", + [ + # Metadata-only change (grants only) + ( + "metadata_only", + "SELECT 1 as id, 'same' as status", + "SELECT 1 as id, 'same' as status", + False, + ), + # Breaking change only + ( + "breaking_only", + "SELECT 1 as id, 'initial' as status, 100 as amount", + "SELECT 1 as id, 'updated' as status", # Removed column + True, + ), + # Both metadata and breaking changes + ( + "metadata_and_breaking", + "SELECT 1 as id, 'initial' as status, 100 as amount", + "SELECT 2 as id, 'changed' as new_status", # Different schema + True, + ), + ], +) +def test_grants_changes_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + change_type: str, + initial_query: str, + updated_query: str, + expect_schema_change: bool, +): + with create_users(engine_adapter, "user1", "user2", "user3") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + model_path = tmp_path / "models" / f"vde_changes_{change_type}.sql" + + initial_model = f""" + MODEL ( + name test_schema.vde_changes_{change_type}, + kind FULL, + grants ( + 'SELECT' = ['{roles["user1"]["username"]}'] + ), + grants_target_layer 'virtual' + ); + {initial_query} + """ + model_path.write_text(initial_model) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + context.plan("prod", auto_apply=True, no_prompts=True) + + table_name = f"test_schema.vde_changes_{change_type}" + initial_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["user1"]["username"] in initial_grants.get("SELECT", []) + assert roles["user2"]["username"] not in initial_grants.get("SELECT", []) + + # Update model with new grants and potentially new query + updated_model = f""" + MODEL ( + name test_schema.vde_changes_{change_type}, + kind FULL, + grants ( + 'SELECT' = ['{roles["user1"]["username"]}', '{roles["user2"]["username"]}', '{roles["user3"]["username"]}'], + 'INSERT' = ['{roles["user3"]["username"]}'] + ), + grants_target_layer 'virtual' + ); + {updated_query} + """ + model_path.write_text(updated_model) + + # Get initial table columns + initial_columns = set( + col[0] + for col in engine_adapter.fetchall( + f"SELECT column_name FROM information_schema.columns WHERE table_schema = 'test_schema' AND table_name = 'vde_changes_{change_type}'" + ) + ) + + context.load() + plan = context.plan("prod", auto_apply=True, no_prompts=True) + + assert len(plan.new_snapshots) == 1 + + current_columns = set( + col[0] + for col in engine_adapter.fetchall( + f"SELECT column_name FROM information_schema.columns WHERE table_schema = 'test_schema' AND table_name = 'vde_changes_{change_type}'" + ) + ) + + if expect_schema_change: + assert current_columns != initial_columns + else: + # For metadata-only changes, schema should be the same + assert current_columns == initial_columns + + # Grants should be updated in all cases + updated_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["user1"]["username"] in updated_grants.get("SELECT", []) + assert roles["user2"]["username"] in updated_grants.get("SELECT", []) + assert roles["user3"]["username"] in updated_grants.get("SELECT", []) + assert roles["user3"]["username"] in updated_grants.get("INSERT", []) + + +@pytest.mark.parametrize( + "grants_target_layer,environment", + [ + ("virtual", "prod"), + ("virtual", "dev"), + ("physical", "prod"), + ("physical", "staging"), + ("all", "prod"), + ("all", "preview"), + ], +) +def test_grants_target_layer_plan_env_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + grants_target_layer: str, + environment: str, +): + with create_users(engine_adapter, "grantee") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name test_schema.vde_layer_model, + kind FULL, + grants ( + 'SELECT' = ['{roles["grantee"]["username"]}'] + ), + grants_target_layer '{grants_target_layer}' + ); + SELECT 1 as id, '{environment}' as env, '{grants_target_layer}' as layer + """ + (tmp_path / "models" / "vde_layer_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + + if environment == "prod": + context.plan("prod", auto_apply=True, no_prompts=True) + table_name = "test_schema.vde_layer_model" + grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] in grants.get("SELECT", []) + else: + context.plan(environment, auto_apply=True, no_prompts=True, include_unmodified=True) + virtual_view = f"test_schema__{environment}.vde_layer_model" + assert context.engine_adapter.table_exists(virtual_view) + virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view, dialect=engine_adapter.dialect) + ) + + data_objects = engine_adapter.get_data_objects("sqlmesh__test_schema") + physical_tables = [ + obj + for obj in data_objects + if "vde_layer_model" in obj.name + and obj.name.endswith("__dev") # Always __dev suffix in VDE dev_only + and "TABLE" in str(obj.type).upper() + ] + + if grants_target_layer == "virtual": + # Virtual layer should have grants, physical should not + assert roles["grantee"]["username"] in virtual_grants.get("SELECT", []) + + assert len(physical_tables) > 0 + for physical_table in physical_tables: + physical_table_name = f"sqlmesh__test_schema.{physical_table.name}" + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] not in physical_grants.get("SELECT", []) + + elif grants_target_layer == "physical": + # Virtual layer should not have grants, physical should + assert roles["grantee"]["username"] not in virtual_grants.get("SELECT", []) + + assert len(physical_tables) > 0 + for physical_table in physical_tables: + physical_table_name = f"sqlmesh__test_schema.{physical_table.name}" + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] in physical_grants.get("SELECT", []) + + else: # grants_target_layer == "all" + # Both layers should have grants + assert roles["grantee"]["username"] in virtual_grants.get("SELECT", []) + assert len(physical_tables) > 0 + for physical_table in physical_tables: + physical_table_name = f"sqlmesh__test_schema.{physical_table.name}" + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] in physical_grants.get("SELECT", []) + + +@pytest.mark.parametrize( + "model_kind", + [ + "SCD_TYPE_2", + "SCD_TYPE_2_BY_TIME", + ], +) +def test_grants_plan_scd_type_2_models( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + model_kind: str, +): + with create_users(engine_adapter, "reader", "writer", "analyst") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + model_name = "scd_model" + + kind_config = f"{model_kind} (unique_key [id])" + model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'initial_data' as name, CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(model_definition) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan( + "dev", start="2023-01-01", end="2023-01-01", auto_apply=True, no_prompts=True + ) + assert len(plan_result.new_snapshots) == 1 + + current_snapshot = plan_result.new_snapshots[0] + fingerprint_version = current_snapshot.fingerprint.to_version() + physical_table_name = ( + f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint_version}__dev" + ) + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert physical_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert physical_grants.get("INSERT", []) == [roles["writer"]["username"]] + + view_name = f"test_schema__dev.{model_name}" + view_grants = engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=engine_adapter.dialect) + ) + assert view_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert view_grants.get("INSERT", []) == [roles["writer"]["username"]] + + # Data change + updated_model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'updated_data' as name, CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(updated_model_definition) + + context.load() + context.plan("dev", start="2023-01-02", end="2023-01-02", auto_apply=True, no_prompts=True) + + snapshot = context.get_snapshot(f"test_schema.{model_name}") + assert snapshot + fingerprint = snapshot.fingerprint.to_version() + table_name = f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint}__dev" + data_change_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert data_change_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert data_change_grants.get("INSERT", []) == [roles["writer"]["username"]] + + # Data + grants changes + grant_change_model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}', '{roles["analyst"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'], + 'UPDATE' = ['{roles["analyst"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'grant_changed_data' as name, CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(grant_change_model_definition) + + context.load() + context.plan("dev", start="2023-01-03", end="2023-01-03", auto_apply=True, no_prompts=True) + + snapshot = context.get_snapshot(f"test_schema.{model_name}") + assert snapshot + fingerprint = snapshot.fingerprint.to_version() + table_name = f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint}__dev" + final_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + expected_select_users = {roles["reader"]["username"], roles["analyst"]["username"]} + assert set(final_grants.get("SELECT", [])) == expected_select_users + assert final_grants.get("INSERT", []) == [roles["writer"]["username"]] + assert final_grants.get("UPDATE", []) == [roles["analyst"]["username"]] + + final_view_grants = engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=engine_adapter.dialect) + ) + assert set(final_view_grants.get("SELECT", [])) == expected_select_users + assert final_view_grants.get("INSERT", []) == [roles["writer"]["username"]] + assert final_view_grants.get("UPDATE", []) == [roles["analyst"]["username"]] + + +@pytest.mark.parametrize( + "model_kind", + [ + "SCD_TYPE_2", + "SCD_TYPE_2_BY_TIME", + ], +) +def test_grants_plan_scd_type_2_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + model_kind: str, +): + with create_users(engine_adapter, "etl_user", "analyst") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + model_name = "vde_scd_model" + + model_def = f""" + MODEL ( + name test_schema.{model_name}, + kind {model_kind} (unique_key [customer_id]), + grants ( + 'SELECT' = ['{roles["analyst"]["username"]}'], + 'INSERT' = ['{roles["etl_user"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT + 1 as customer_id, + 'active' as status, + CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + + # Prod + context.plan("prod", auto_apply=True, no_prompts=True) + prod_table = f"test_schema.{model_name}" + prod_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_table, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in prod_grants.get("SELECT", []) + assert roles["etl_user"]["username"] in prod_grants.get("INSERT", []) + + # Dev + context.plan("dev", auto_apply=True, no_prompts=True, include_unmodified=True) + dev_view = f"test_schema__dev.{model_name}" + dev_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_view, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in dev_grants.get("SELECT", []) + assert roles["etl_user"]["username"] in dev_grants.get("INSERT", []) + + snapshot = context.get_snapshot(f"test_schema.{model_name}") + assert snapshot + fingerprint_version = snapshot.fingerprint.to_version() + dev_physical_table_name = ( + f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint_version}__dev" + ) + + dev_physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in dev_physical_grants.get("SELECT", []) + assert roles["etl_user"]["username"] in dev_physical_grants.get("INSERT", []) diff --git a/tests/core/engine_adapter/test_base.py b/tests/core/engine_adapter/test_base.py index ba775c0779..2b9bcc665f 100644 --- a/tests/core/engine_adapter/test_base.py +++ b/tests/core/engine_adapter/test_base.py @@ -4065,3 +4065,108 @@ def test_data_object_cache_cleared_on_create_table_like( assert result is not None assert result.name == "target_table" assert mock_get_data_objects.call_count == 2 + + +def test_diff_grants_configs(): + new = {"SELECT": ["u1", "u2"], "INSERT": ["u1"]} + old = {"SELECT": ["u1", "u3"], "update": ["u1"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions.get("SELECT") and set(additions["SELECT"]) == {"u2"} + assert removals.get("SELECT") and set(removals["SELECT"]) == {"u3"} + + assert additions.get("INSERT") and set(additions["INSERT"]) == {"u1"} + assert removals.get("update") and set(removals["update"]) == {"u1"} + + for perm, grantees in additions.items(): + assert set(grantees).isdisjoint(set(old.get(perm, []))) + for perm, grantees in removals.items(): + assert set(grantees).isdisjoint(set(new.get(perm, []))) + + +def test_diff_grants_configs_empty_new(): + new = {} + old = {"SELECT": ["u1", "u2"], "INSERT": ["u3"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions == {} + assert removals == old + + +def test_diff_grants_configs_empty_old(): + new = {"SELECT": ["u1", "u2"], "INSERT": ["u3"]} + old = {} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions == new + assert removals == {} + + +def test_diff_grants_configs_identical(): + grants = {"SELECT": ["u1", "u2"], "INSERT": ["u3"]} + + additions, removals = EngineAdapter._diff_grants_configs(grants, grants) + + assert additions == {} + assert removals == {} + + +def test_diff_grants_configs_none_configs(): + grants = {"SELECT": ["u1"]} + + additions, removals = EngineAdapter._diff_grants_configs(grants, {}) + assert additions == grants + assert removals == {} + + additions, removals = EngineAdapter._diff_grants_configs({}, grants) + assert additions == {} + assert removals == grants + + additions, removals = EngineAdapter._diff_grants_configs({}, {}) + assert additions == {} + assert removals == {} + + +def test_diff_grants_configs_duplicate_grantees(): + new = {"SELECT": ["u1", "u2", "u1"]} + old = {"SELECT": ["u2", "u3", "u2"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions["SELECT"] == ["u1", "u1"] + assert removals["SELECT"] == ["u3"] + + +def test_diff_grants_configs_case_sensitive(): + new = {"select": ["u1"], "SELECT": ["u2"]} + old = {"Select": ["u3"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert set(additions.keys()) == {"select", "SELECT"} + assert set(removals.keys()) == {"Select"} + assert additions["select"] == ["u1"] + assert additions["SELECT"] == ["u2"] + assert removals["Select"] == ["u3"] + + +def test_sync_grants_config_unsupported_engine(make_mocked_engine_adapter: t.Callable): + adapter = make_mocked_engine_adapter(EngineAdapter) + adapter.SUPPORTS_GRANTS = False + + relation = exp.to_table("test_table") + grants_config = {"SELECT": ["user1"]} + + with pytest.raises(NotImplementedError, match="Engine does not support grants"): + adapter.sync_grants_config(relation, grants_config) + + +def test_get_current_grants_config_not_implemented(make_mocked_engine_adapter: t.Callable): + adapter = make_mocked_engine_adapter(EngineAdapter) + relation = exp.to_table("test_table") + + with pytest.raises(NotImplementedError): + adapter._get_current_grants_config(relation) diff --git a/tests/core/engine_adapter/test_base_postgres.py b/tests/core/engine_adapter/test_base_postgres.py index df280a9059..f286c47c56 100644 --- a/tests/core/engine_adapter/test_base_postgres.py +++ b/tests/core/engine_adapter/test_base_postgres.py @@ -3,6 +3,7 @@ from unittest.mock import call import pytest +from pytest_mock.plugin import MockerFixture from sqlglot import exp, parse_one from sqlmesh.core.engine_adapter.base_postgres import BasePostgresEngineAdapter @@ -75,3 +76,26 @@ def test_drop_view(make_mocked_engine_adapter: t.Callable): call('DROP VIEW IF EXISTS "db"."view"'), ] ) + + +def test_get_current_schema(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(BasePostgresEngineAdapter) + + fetchone_mock = mocker.patch.object(adapter, "fetchone", return_value=("test_schema",)) + result = adapter._get_current_schema() + + assert result == "test_schema" + fetchone_mock.assert_called_once() + executed_query = fetchone_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + assert executed_sql == "SELECT CURRENT_SCHEMA" + + fetchone_mock.reset_mock() + fetchone_mock.return_value = None + result = adapter._get_current_schema() + assert result == "public" + + fetchone_mock.reset_mock() + fetchone_mock.return_value = (None,) # search_path = '' or 'nonexistent_schema' + result = adapter._get_current_schema() + assert result == "public" diff --git a/tests/core/engine_adapter/test_bigquery.py b/tests/core/engine_adapter/test_bigquery.py index f195bbaa2a..047613e47a 100644 --- a/tests/core/engine_adapter/test_bigquery.py +++ b/tests/core/engine_adapter/test_bigquery.py @@ -13,6 +13,7 @@ import sqlmesh.core.dialect as d from sqlmesh.core.engine_adapter import BigQueryEngineAdapter from sqlmesh.core.engine_adapter.bigquery import select_partitions_expr +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.node import IntervalUnit from sqlmesh.utils import AttributeDict from sqlmesh.utils.errors import SQLMeshError @@ -588,13 +589,14 @@ def _to_sql_calls(execute_mock: t.Any, identify: bool = True) -> t.List[str]: execute_mock = execute_mock.execute output = [] for call in execute_mock.call_args_list: - value = call[0][0] - sql = ( - value.sql(dialect="bigquery", identify=identify) - if isinstance(value, exp.Expression) - else str(value) - ) - output.append(sql) + values = ensure_list(call[0][0]) + for value in values: + sql = ( + value.sql(dialect="bigquery", identify=identify) + if isinstance(value, exp.Expression) + else str(value) + ) + output.append(sql) return output @@ -1213,3 +1215,168 @@ def test_scd_type_2_by_partitioning(adapter: BigQueryEngineAdapter): # Both calls should contain the partition logic (the scd logic is already covered by other tests) assert "PARTITION BY TIMESTAMP_TRUNC(`valid_from`, DAY)" in calls[0] assert "PARTITION BY TIMESTAMP_TRUNC(`valid_from`, DAY)" in calls[1] + + +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("project.dataset.test_table", dialect="bigquery") + new_grants_config = { + "roles/bigquery.dataViewer": ["user:analyst@example.com", "group:data-team@example.com"], + "roles/bigquery.dataEditor": ["user:admin@example.com"], + } + current_grants = [ + ("roles/bigquery.dataViewer", "user:old_analyst@example.com"), + ("roles/bigquery.admin", "user:old_admin@example.com"), + ] + + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + execute_mock = mocker.patch.object(adapter, "execute") + mocker.patch.object(adapter, "get_current_catalog", return_value="project") + mocker.patch.object(adapter.client, "location", "us-central1") + + mock_dataset = mocker.Mock() + mock_dataset.location = "us-central1" + mocker.patch.object(adapter, "_db_call", return_value=mock_dataset) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="bigquery") + expected_sql = ( + "SELECT privilege_type, grantee FROM `project`.`region-us-central1`.`INFORMATION_SCHEMA.OBJECT_PRIVILEGES` AS OBJECT_PRIVILEGES " + "WHERE object_schema = 'dataset' AND object_name = 'test_table' AND SPLIT(grantee, ':')[OFFSET(1)] <> session_user()" + ) + assert executed_sql == expected_sql + + sql_calls = _to_sql_calls(execute_mock) + + assert len(sql_calls) == 4 + assert ( + "REVOKE `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` FROM 'user:old_analyst@example.com'" + in sql_calls + ) + assert ( + "REVOKE `roles/bigquery.admin` ON TABLE `project`.`dataset`.`test_table` FROM 'user:old_admin@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` TO 'user:analyst@example.com', 'group:data-team@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataEditor` ON TABLE `project`.`dataset`.`test_table` TO 'user:admin@example.com'" + in sql_calls + ) + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("project.dataset.test_table", dialect="bigquery") + new_grants_config = { + "roles/bigquery.dataViewer": [ + "user:analyst1@example.com", + "user:analyst2@example.com", + "user:analyst3@example.com", + ], + "roles/bigquery.dataEditor": ["user:analyst2@example.com", "user:editor@example.com"], + } + current_grants = [ + ("roles/bigquery.dataViewer", "user:analyst1@example.com"), # Keep + ("roles/bigquery.dataViewer", "user:old_analyst@example.com"), # Remove + ("roles/bigquery.dataEditor", "user:analyst2@example.com"), # Keep + ("roles/bigquery.admin", "user:admin@example.com"), # Remove + ] + + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + execute_mock = mocker.patch.object(adapter, "execute") + mocker.patch.object(adapter, "get_current_catalog", return_value="project") + mocker.patch.object(adapter.client, "location", "us-central1") + + mock_dataset = mocker.Mock() + mock_dataset.location = "us-central1" + mocker.patch.object(adapter, "_db_call", return_value=mock_dataset) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="bigquery") + expected_sql = ( + "SELECT privilege_type, grantee FROM `project`.`region-us-central1`.`INFORMATION_SCHEMA.OBJECT_PRIVILEGES` AS OBJECT_PRIVILEGES " + "WHERE object_schema = 'dataset' AND object_name = 'test_table' AND SPLIT(grantee, ':')[OFFSET(1)] <> session_user()" + ) + assert executed_sql == expected_sql + + sql_calls = _to_sql_calls(execute_mock) + + assert len(sql_calls) == 4 + assert ( + "REVOKE `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` FROM 'user:old_analyst@example.com'" + in sql_calls + ) + assert ( + "REVOKE `roles/bigquery.admin` ON TABLE `project`.`dataset`.`test_table` FROM 'user:admin@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` TO 'user:analyst2@example.com', 'user:analyst3@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataEditor` ON TABLE `project`.`dataset`.`test_table` TO 'user:editor@example.com'" + in sql_calls + ) + + +@pytest.mark.parametrize( + "table_type, expected_keyword", + [ + (DataObjectType.TABLE, "TABLE"), + (DataObjectType.VIEW, "VIEW"), + (DataObjectType.MATERIALIZED_VIEW, "MATERIALIZED VIEW"), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockerFixture, + table_type: DataObjectType, + expected_keyword: str, +) -> None: + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("project.dataset.test_object", dialect="bigquery") + + mocker.patch.object(adapter, "fetchall", return_value=[]) + execute_mock = mocker.patch.object(adapter, "execute") + mocker.patch.object(adapter, "get_current_catalog", return_value="project") + mocker.patch.object(adapter.client, "location", "us-central1") + + mock_dataset = mocker.Mock() + mock_dataset.location = "us-central1" + mocker.patch.object(adapter, "_db_call", return_value=mock_dataset) + + adapter.sync_grants_config( + relation, {"roles/bigquery.dataViewer": ["user:test@example.com"]}, table_type + ) + + executed_exprs = execute_mock.call_args[0][0] + sql_calls = [expr.sql(dialect="bigquery") for expr in executed_exprs] + assert sql_calls == [ + f"GRANT `roles/bigquery.dataViewer` ON {expected_keyword} project.dataset.test_object TO 'user:test@example.com'" + ] + + +def test_sync_grants_config_no_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("test_table", dialect="bigquery") + new_grants_config = { + "roles/bigquery.dataViewer": ["user:analyst@example.com"], + "roles/bigquery.dataEditor": ["user:editor@example.com"], + } + + with pytest.raises(ValueError, match="Table test_table does not have a schema \\(dataset\\)"): + adapter.sync_grants_config(relation, new_grants_config) diff --git a/tests/core/engine_adapter/test_databricks.py b/tests/core/engine_adapter/test_databricks.py index 27988fed39..e4512f11c9 100644 --- a/tests/core/engine_adapter/test_databricks.py +++ b/tests/core/engine_adapter/test_databricks.py @@ -128,17 +128,194 @@ def test_get_current_catalog(mocker: MockFixture, make_mocked_engine_adapter: t. assert to_sql_calls(adapter) == ["SELECT CURRENT_CATALOG()"] -def test_get_current_database(mocker: MockFixture, make_mocked_engine_adapter: t.Callable): +def test_get_current_schema(mocker: MockFixture, make_mocked_engine_adapter: t.Callable): mocker.patch( "sqlmesh.core.engine_adapter.databricks.DatabricksEngineAdapter.set_current_catalog" ) adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog") adapter.cursor.fetchone.return_value = ("test_database",) - assert adapter.get_current_database() == "test_database" + assert adapter._get_current_schema() == "test_database" assert to_sql_calls(adapter) == ["SELECT CURRENT_DATABASE()"] +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockFixture): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main") + relation = exp.to_table("main.test_schema.test_table", dialect="databricks") + new_grants_config = { + "SELECT": ["group1", "group2"], + "MODIFY": ["writers"], + } + + current_grants = [ + ("SELECT", "legacy"), + ("REFRESH", "stale"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM main.information_schema.table_privileges " + "WHERE table_catalog = 'main' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert "GRANT SELECT ON TABLE `main`.`test_schema`.`test_table` TO `group1`" in sql_calls + assert "GRANT SELECT ON TABLE `main`.`test_schema`.`test_table` TO `group2`" in sql_calls + assert "GRANT MODIFY ON TABLE `main`.`test_schema`.`test_table` TO `writers`" in sql_calls + assert "REVOKE SELECT ON TABLE `main`.`test_schema`.`test_table` FROM `legacy`" in sql_calls + assert "REVOKE REFRESH ON TABLE `main`.`test_schema`.`test_table` FROM `stale`" in sql_calls + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockFixture +): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main") + relation = exp.to_table("main.test_schema.test_table", dialect="databricks") + new_grants_config = { + "SELECT": ["shared", "new_role"], + "MODIFY": ["shared", "writer"], + } + + current_grants = [ + ("SELECT", "shared"), + ("SELECT", "legacy"), + ("MODIFY", "shared"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM main.information_schema.table_privileges " + "WHERE table_catalog = 'main' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + + assert "GRANT SELECT ON TABLE `main`.`test_schema`.`test_table` TO `new_role`" in sql_calls + assert "GRANT MODIFY ON TABLE `main`.`test_schema`.`test_table` TO `writer`" in sql_calls + assert "REVOKE SELECT ON TABLE `main`.`test_schema`.`test_table` FROM `legacy`" in sql_calls + + +@pytest.mark.parametrize( + "table_type, expected_keyword", + [ + (DataObjectType.TABLE, "TABLE"), + (DataObjectType.VIEW, "VIEW"), + (DataObjectType.MATERIALIZED_VIEW, "MATERIALIZED VIEW"), + (DataObjectType.MANAGED_TABLE, "TABLE"), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockFixture, + table_type: DataObjectType, + expected_keyword: str, +) -> None: + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main") + relation = exp.to_table("main.test_schema.test_object", dialect="databricks") + + mocker.patch.object(adapter, "fetchall", return_value=[]) + + adapter.sync_grants_config(relation, {"SELECT": ["test"]}, table_type) + + sql_calls = to_sql_calls(adapter) + assert sql_calls == [ + f"GRANT SELECT ON {expected_keyword} `main`.`test_schema`.`test_object` TO `test`" + ] + + +def test_sync_grants_config_quotes(make_mocked_engine_adapter: t.Callable, mocker: MockFixture): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="`test_db`") + relation = exp.to_table("`test_db`.`test_schema`.`test_table`", dialect="databricks") + new_grants_config = { + "SELECT": ["group1", "group2"], + "MODIFY": ["writers"], + } + + current_grants = [ + ("SELECT", "legacy"), + ("REFRESH", "stale"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM `test_db`.information_schema.table_privileges " + "WHERE table_catalog = 'test_db' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert "GRANT SELECT ON TABLE `test_db`.`test_schema`.`test_table` TO `group1`" in sql_calls + assert "GRANT SELECT ON TABLE `test_db`.`test_schema`.`test_table` TO `group2`" in sql_calls + assert "GRANT MODIFY ON TABLE `test_db`.`test_schema`.`test_table` TO `writers`" in sql_calls + assert "REVOKE SELECT ON TABLE `test_db`.`test_schema`.`test_table` FROM `legacy`" in sql_calls + assert "REVOKE REFRESH ON TABLE `test_db`.`test_schema`.`test_table` FROM `stale`" in sql_calls + + +def test_sync_grants_config_no_catalog_or_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockFixture +): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main_catalog") + relation = exp.to_table("test_table", dialect="databricks") + new_grants_config = { + "SELECT": ["group1", "group2"], + "MODIFY": ["writers"], + } + + current_grants = [ + ("SELECT", "legacy"), + ("REFRESH", "stale"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + mocker.patch.object(adapter, "_get_current_schema", return_value="schema") + mocker.patch.object(adapter, "get_current_catalog", return_value="main_catalog") + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM `main_catalog`.information_schema.table_privileges " + "WHERE table_catalog = 'main_catalog' AND table_schema = 'schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert "GRANT SELECT ON TABLE `test_table` TO `group1`" in sql_calls + assert "GRANT SELECT ON TABLE `test_table` TO `group2`" in sql_calls + assert "GRANT MODIFY ON TABLE `test_table` TO `writers`" in sql_calls + assert "REVOKE SELECT ON TABLE `test_table` FROM `legacy`" in sql_calls + assert "REVOKE REFRESH ON TABLE `test_table` FROM `stale`" in sql_calls + + def test_insert_overwrite_by_partition_query( make_mocked_engine_adapter: t.Callable, mocker: MockFixture, make_temp_table_name: t.Callable ): diff --git a/tests/core/engine_adapter/test_postgres.py b/tests/core/engine_adapter/test_postgres.py index 6134126a41..ebcdd03f55 100644 --- a/tests/core/engine_adapter/test_postgres.py +++ b/tests/core/engine_adapter/test_postgres.py @@ -177,3 +177,108 @@ def test_server_version(make_mocked_engine_adapter: t.Callable, mocker: MockerFi del adapter.server_version fetchone_mock.return_value = ("15.13 (Debian 15.13-1.pgdg120+1)",) assert adapter.server_version == (15, 13) + + +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="postgres") + new_grants_config = {"SELECT": ["user1", "user2"], "INSERT": ["user3"]} + + current_grants = [("SELECT", "old_user"), ("UPDATE", "admin_user")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + + assert executed_sql == ( + "SELECT privilege_type, grantee FROM information_schema.role_table_grants " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = current_role AND grantee <> current_role" + ) + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user1", "user2"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user3"' in sql_calls + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "old_user"' in sql_calls + assert 'REVOKE UPDATE ON "test_schema"."test_table" FROM "admin_user"' in sql_calls + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="postgres") + new_grants_config = {"SELECT": ["user1", "user2", "user3"], "INSERT": ["user2", "user4"]} + + current_grants = [ + ("SELECT", "user1"), + ("SELECT", "user5"), + ("INSERT", "user2"), + ("UPDATE", "user3"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + + assert executed_sql == ( + "SELECT privilege_type, grantee FROM information_schema.role_table_grants " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = current_role AND grantee <> current_role" + ) + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user2", "user3"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user4"' in sql_calls + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "user5"' in sql_calls + assert 'REVOKE UPDATE ON "test_schema"."test_table" FROM "user3"' in sql_calls + + +def test_diff_grants_configs(make_mocked_engine_adapter: t.Callable): + new_grants = {"select": ["USER1", "USER2"], "insert": ["user3"]} + old_grants = {"SELECT": ["user1", "user4"], "UPDATE": ["user5"]} + + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + additions, removals = adapter._diff_grants_configs(new_grants, old_grants) + + assert additions["select"] == ["USER2"] + assert additions["insert"] == ["user3"] + + assert removals["SELECT"] == ["user4"] + assert removals["UPDATE"] == ["user5"] + + +def test_sync_grants_config_with_default_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + relation = exp.to_table("test_table", dialect="postgres") # No schema + new_grants_config = {"SELECT": ["user1"], "INSERT": ["user2"]} + + currrent_grants = [("UPDATE", "old_user")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=currrent_grants) + get_schema_mock = mocker.patch.object(adapter, "_get_current_schema", return_value="public") + + adapter.sync_grants_config(relation, new_grants_config) + + get_schema_mock.assert_called_once() + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + + assert executed_sql == ( + "SELECT privilege_type, grantee FROM information_schema.role_table_grants " + "WHERE table_schema = 'public' AND table_name = 'test_table' " + "AND grantor = current_role AND grantee <> current_role" + ) diff --git a/tests/core/engine_adapter/test_redshift.py b/tests/core/engine_adapter/test_redshift.py index c5e3dfff17..5438943556 100644 --- a/tests/core/engine_adapter/test_redshift.py +++ b/tests/core/engine_adapter/test_redshift.py @@ -9,7 +9,7 @@ from sqlglot import parse_one from sqlmesh.core.engine_adapter import RedshiftEngineAdapter -from sqlmesh.core.engine_adapter.shared import DataObject +from sqlmesh.core.engine_adapter.shared import DataObject, DataObjectType from sqlmesh.utils.errors import SQLMeshError from tests.core.engine_adapter import to_sql_calls @@ -83,6 +83,154 @@ def test_varchar_size_workaround(make_mocked_engine_adapter: t.Callable, mocker: ] +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="redshift") + new_grants_config = {"SELECT": ["user1", "user2"], "INSERT": ["user3"]} + + current_grants = [("SELECT", "old_user"), ("UPDATE", "legacy_user")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "old_user"' in sql_calls + assert 'REVOKE UPDATE ON "test_schema"."test_table" FROM "legacy_user"' in sql_calls + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user1", "user2"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user3"' in sql_calls + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="redshift") + new_grants_config = { + "SELECT": ["user_shared", "user_new"], + "INSERT": ["user_shared", "user_writer"], + } + + current_grants = [ + ("SELECT", "user_shared"), + ("SELECT", "user_legacy"), + ("INSERT", "user_shared"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "user_legacy"' in sql_calls + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user_new"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user_writer"' in sql_calls + + +@pytest.mark.parametrize( + "table_type", + [ + (DataObjectType.TABLE), + (DataObjectType.VIEW), + (DataObjectType.MATERIALIZED_VIEW), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockerFixture, + table_type: DataObjectType, +) -> None: + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_schema.test_object", dialect="redshift") + + mocker.patch.object(adapter, "fetchall", return_value=[]) + + adapter.sync_grants_config(relation, {"SELECT": ["user_test"]}, table_type) + + sql_calls = to_sql_calls(adapter) + # we don't need to explicitly specify object_type for tables and views + assert sql_calls == [f'GRANT SELECT ON "test_schema"."test_object" TO "user_test"'] + + +def test_sync_grants_config_quotes(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table('"TestSchema"."TestTable"', dialect="redshift") + new_grants_config = {"SELECT": ["user1", "user2"], "INSERT": ["user3"]} + + current_grants = [("SELECT", "user_old"), ("UPDATE", "user_legacy")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'TestSchema' AND table_name = 'TestTable' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + assert 'REVOKE SELECT ON "TestSchema"."TestTable" FROM "user_old"' in sql_calls + assert 'REVOKE UPDATE ON "TestSchema"."TestTable" FROM "user_legacy"' in sql_calls + assert 'GRANT SELECT ON "TestSchema"."TestTable" TO "user1", "user2"' in sql_calls + assert 'GRANT INSERT ON "TestSchema"."TestTable" TO "user3"' in sql_calls + + +def test_sync_grants_config_no_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_table", dialect="redshift") + new_grants_config = {"SELECT": ["user1"], "INSERT": ["user2"]} + + current_grants = [("UPDATE", "user_old")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + get_schema_mock = mocker.patch.object(adapter, "_get_current_schema", return_value="public") + + adapter.sync_grants_config(relation, new_grants_config) + + get_schema_mock.assert_called_once() + + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'public' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + assert 'REVOKE UPDATE ON "test_table" FROM "user_old"' in sql_calls + assert 'GRANT SELECT ON "test_table" TO "user1"' in sql_calls + assert 'GRANT INSERT ON "test_table" TO "user2"' in sql_calls + + def test_create_table_from_query_exists_no_if_not_exists( adapter: t.Callable, mocker: MockerFixture ): diff --git a/tests/core/engine_adapter/test_snowflake.py b/tests/core/engine_adapter/test_snowflake.py index ce4d3a886c..60f6d38e5f 100644 --- a/tests/core/engine_adapter/test_snowflake.py +++ b/tests/core/engine_adapter/test_snowflake.py @@ -4,6 +4,7 @@ import pytest from pytest_mock.plugin import MockerFixture from sqlglot import exp, parse_one +from sqlglot.optimizer.normalize_identifiers import normalize_identifiers import sqlmesh.core.dialect as d from sqlmesh.core.dialect import normalize_model_name @@ -245,6 +246,204 @@ def test_multiple_column_comments(make_mocked_engine_adapter: t.Callable, mocker ] +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table("test_db.test_schema.test_table", dialect="snowflake"), dialect="snowflake" + ) + new_grants_config = {"SELECT": ["ROLE role1", "ROLE role2"], "INSERT": ["ROLE role3"]} + + current_grants = [ + ("SELECT", "ROLE old_role"), + ("UPDATE", "ROLE legacy_role"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + "SELECT privilege_type, grantee FROM TEST_DB.INFORMATION_SCHEMA.TABLE_PRIVILEGES " + "WHERE table_catalog = 'TEST_DB' AND table_schema = 'TEST_SCHEMA' AND table_name = 'TEST_TABLE' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert 'GRANT SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "ROLE1"' in sql_calls + assert 'GRANT SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "ROLE2"' in sql_calls + assert 'GRANT INSERT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "ROLE3"' in sql_calls + assert ( + 'REVOKE SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" FROM ROLE "OLD_ROLE"' + in sql_calls + ) + assert ( + 'REVOKE UPDATE ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" FROM ROLE "LEGACY_ROLE"' + in sql_calls + ) + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table("test_db.test_schema.test_table", dialect="snowflake"), dialect="snowflake" + ) + new_grants_config = { + "SELECT": ["ROLE shared", "ROLE new_role"], + "INSERT": ["ROLE shared", "ROLE writer"], + } + + current_grants = [ + ("SELECT", "ROLE shared"), + ("SELECT", "ROLE legacy"), + ("INSERT", "ROLE shared"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + """SELECT privilege_type, grantee FROM TEST_DB.INFORMATION_SCHEMA.TABLE_PRIVILEGES """ + "WHERE table_catalog = 'TEST_DB' AND table_schema = 'TEST_SCHEMA' AND table_name = 'TEST_TABLE' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + + assert ( + 'GRANT SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "NEW_ROLE"' in sql_calls + ) + assert ( + 'GRANT INSERT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "WRITER"' in sql_calls + ) + assert ( + 'REVOKE SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" FROM ROLE "LEGACY"' + in sql_calls + ) + + +@pytest.mark.parametrize( + "table_type, expected_keyword", + [ + (DataObjectType.TABLE, "TABLE"), + (DataObjectType.VIEW, "VIEW"), + (DataObjectType.MATERIALIZED_VIEW, "MATERIALIZED VIEW"), + (DataObjectType.MANAGED_TABLE, "DYNAMIC TABLE"), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockerFixture, + table_type: DataObjectType, + expected_keyword: str, +) -> None: + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table("test_db.test_schema.test_object", dialect="snowflake"), dialect="snowflake" + ) + + mocker.patch.object(adapter, "fetchall", return_value=[]) + + adapter.sync_grants_config(relation, {"SELECT": ["ROLE test"]}, table_type) + + sql_calls = to_sql_calls(adapter) + assert sql_calls == [ + f'GRANT SELECT ON {expected_keyword} "TEST_DB"."TEST_SCHEMA"."TEST_OBJECT" TO ROLE "TEST"' + ] + + +def test_sync_grants_config_quotes(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table('"test_db"."test_schema"."test_table"', dialect="snowflake"), + dialect="snowflake", + ) + new_grants_config = {"SELECT": ["ROLE role1", "ROLE role2"], "INSERT": ["ROLE role3"]} + + current_grants = [ + ("SELECT", "ROLE old_role"), + ("UPDATE", "ROLE legacy_role"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + """SELECT privilege_type, grantee FROM "test_db".INFORMATION_SCHEMA.TABLE_PRIVILEGES """ + "WHERE table_catalog = 'test_db' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert 'GRANT SELECT ON TABLE "test_db"."test_schema"."test_table" TO ROLE "ROLE1"' in sql_calls + assert 'GRANT SELECT ON TABLE "test_db"."test_schema"."test_table" TO ROLE "ROLE2"' in sql_calls + assert 'GRANT INSERT ON TABLE "test_db"."test_schema"."test_table" TO ROLE "ROLE3"' in sql_calls + assert ( + 'REVOKE SELECT ON TABLE "test_db"."test_schema"."test_table" FROM ROLE "OLD_ROLE"' + in sql_calls + ) + assert ( + 'REVOKE UPDATE ON TABLE "test_db"."test_schema"."test_table" FROM ROLE "LEGACY_ROLE"' + in sql_calls + ) + + +def test_sync_grants_config_no_catalog_or_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table('"TesT_Table"', dialect="snowflake"), dialect="snowflake" + ) + new_grants_config = {"SELECT": ["ROLE role1", "ROLE role2"], "INSERT": ["ROLE role3"]} + + current_grants = [ + ("SELECT", "ROLE old_role"), + ("UPDATE", "ROLE legacy_role"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + mocker.patch.object(adapter, "get_current_catalog", return_value="caTalog") + mocker.patch.object(adapter, "_get_current_schema", return_value="sChema") + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + """SELECT privilege_type, grantee FROM "caTalog".INFORMATION_SCHEMA.TABLE_PRIVILEGES """ + "WHERE table_catalog = 'caTalog' AND table_schema = 'sChema' AND table_name = 'TesT_Table' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert 'GRANT SELECT ON TABLE "TesT_Table" TO ROLE "ROLE1"' in sql_calls + assert 'GRANT SELECT ON TABLE "TesT_Table" TO ROLE "ROLE2"' in sql_calls + assert 'GRANT INSERT ON TABLE "TesT_Table" TO ROLE "ROLE3"' in sql_calls + assert 'REVOKE SELECT ON TABLE "TesT_Table" FROM ROLE "OLD_ROLE"' in sql_calls + assert 'REVOKE UPDATE ON TABLE "TesT_Table" FROM ROLE "LEGACY_ROLE"' in sql_calls + + def test_df_to_source_queries_use_schema( make_mocked_engine_adapter: t.Callable, mocker: MockerFixture ): diff --git a/tests/core/engine_adapter/test_spark.py b/tests/core/engine_adapter/test_spark.py index bc4e352bd7..d7c3127f05 100644 --- a/tests/core/engine_adapter/test_spark.py +++ b/tests/core/engine_adapter/test_spark.py @@ -224,7 +224,7 @@ def test_replace_query_self_ref_not_exists( lambda self: "spark_catalog", ) mocker.patch( - "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.get_current_database", + "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter._get_current_schema", side_effect=lambda: "default", ) @@ -283,7 +283,7 @@ def test_replace_query_self_ref_exists( return_value="spark_catalog", ) mocker.patch( - "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.get_current_database", + "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter._get_current_schema", return_value="default", ) diff --git a/tests/core/test_context.py b/tests/core/test_context.py index b7ce64eb4c..6270cec56a 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -3050,9 +3050,10 @@ def test_uppercase_gateway_external_models(tmp_path): # Check that the column types are properly loaded (not UNKNOWN) external_model = gateway_specific_models[0] column_types = {name: str(dtype) for name, dtype in external_model.columns_to_types.items()} - assert column_types == {"id": "INT", "name": "TEXT"}, ( - f"External model column types should not be UNKNOWN, got: {column_types}" - ) + assert column_types == { + "id": "INT", + "name": "TEXT", + }, f"External model column types should not be UNKNOWN, got: {column_types}" # Test that when using a different case for the gateway parameter, we get the same results context_mixed_case = Context( @@ -3177,3 +3178,55 @@ def test_lint_model_projections(tmp_path: Path): with pytest.raises(LinterError, match=config_err): prod_plan = context.plan(no_prompts=True, auto_apply=True) + + +def test_grants_through_plan_apply(sushi_context, mocker): + from sqlmesh.core.engine_adapter.duckdb import DuckDBEngineAdapter + from sqlmesh.core.model.meta import GrantsTargetLayer + + model = sushi_context.get_model("sushi.waiter_revenue_by_day") + + mocker.patch.object(DuckDBEngineAdapter, "SUPPORTS_GRANTS", True) + sync_grants_mock = mocker.patch.object(DuckDBEngineAdapter, "sync_grants_config") + + model_with_grants = model.copy( + update={ + "grants": {"select": ["analyst", "reporter"]}, + "grants_target_layer": GrantsTargetLayer.ALL, + } + ) + sushi_context.upsert_model(model_with_grants) + + sushi_context.plan("dev", no_prompts=True, auto_apply=True) + + # When planning for dev env w/ metadata only changes, + # only virtual layer is updated, so no physical grants are applied + assert sync_grants_mock.call_count == 1 + assert all( + call[0][1] == {"select": ["analyst", "reporter"]} + for call in sync_grants_mock.call_args_list + ) + + sync_grants_mock.reset_mock() + + new_grants = ({"select": ["analyst", "reporter", "manager"], "insert": ["etl_user"]},) + model_updated = model_with_grants.copy( + update={ + "query": parse_one(model.query.sql() + " LIMIT 1000"), + "grants": new_grants, + # force model update, hence new physical table creation + "stamp": "update model and grants", + } + ) + sushi_context.upsert_model(model_updated) + sushi_context.plan("dev", no_prompts=True, auto_apply=True) + + # Applies grants 2 times: 1 x physical, 1 x virtual + assert sync_grants_mock.call_count == 2 + assert all(call[0][1] == new_grants for call in sync_grants_mock.call_args_list) + + sync_grants_mock.reset_mock() + + # plan for prod + sushi_context.plan(no_prompts=True, auto_apply=True) + assert sync_grants_mock.call_count == 2 diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 726ac52b66..f1a9eeb0b9 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -1,6 +1,7 @@ # ruff: noqa: F811 import json import typing as t +import re from datetime import date, datetime from pathlib import Path from unittest.mock import patch, PropertyMock @@ -14,7 +15,7 @@ from sqlglot.schema import MappingSchema from sqlmesh.cli.project_init import init_example_project, ProjectTemplate from sqlmesh.core.environment import EnvironmentNamingInfo -from sqlmesh.core.model.kind import TimeColumn, ModelKindName +from sqlmesh.core.model.kind import TimeColumn, ModelKindName, SeedKind from sqlmesh import CustomMaterialization, CustomKind from pydantic import model_validator, ValidationError @@ -36,6 +37,7 @@ from sqlmesh.core.dialect import parse from sqlmesh.core.engine_adapter.base import MERGE_SOURCE_ALIAS, MERGE_TARGET_ALIAS from sqlmesh.core.engine_adapter.duckdb import DuckDBEngineAdapter +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.macros import MacroEvaluator, macro from sqlmesh.core.model import ( CustomKind, @@ -51,6 +53,8 @@ TimeColumn, ExternalKind, ViewKind, + EmbeddedKind, + SCDType2ByTimeKind, create_external_model, create_seed_model, create_sql_model, @@ -59,7 +63,7 @@ model, ) from sqlmesh.core.model.common import parse_expression -from sqlmesh.core.model.kind import ModelKindName, _model_kind_validator +from sqlmesh.core.model.kind import _ModelKind, ModelKindName, _model_kind_validator from sqlmesh.core.model.seed import CsvSettings from sqlmesh.core.node import IntervalUnit, _Node, DbtNodeInfo from sqlmesh.core.signal import signal @@ -1922,7 +1926,8 @@ def test_render_definition_with_defaults(): kind VIEW ( materialized FALSE ), - virtual_environment_mode 'full' + virtual_environment_mode 'full', + grants_target_layer 'virtual' ); {query} @@ -1935,6 +1940,90 @@ def test_render_definition_with_defaults(): ) == d.format_model_expressions(expected_expressions) +def test_render_definition_with_grants(): + from sqlmesh.core.model.meta import GrantsTargetLayer + + expressions = d.parse( + """ + MODEL ( + name test.grants_model, + kind FULL, + grants ( + 'select' = ['user1', 'user2'], + 'insert' = ['admin'], + 'roles/bigquery.dataViewer' = ['user:data_eng@mycompany.com'] + ), + grants_target_layer all, + ); + SELECT 1 as id + """ + ) + model = load_sql_based_model(expressions) + assert model.grants_target_layer == GrantsTargetLayer.ALL + assert model.grants == { + "select": ["user1", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": ["user:data_eng@mycompany.com"], + } + + rendered = model.render_definition(include_defaults=True) + rendered_text = d.format_model_expressions(rendered) + assert "grants_target_layer 'all'" in rendered_text + assert re.search( + r"grants\s*\(" + r"\s*'select'\s*=\s*ARRAY\('user1',\s*'user2'\)," + r"\s*'insert'\s*=\s*ARRAY\('admin'\)," + r"\s*'roles/bigquery.dataViewer'\s*=\s*ARRAY\('user:data_eng@mycompany.com'\)" + r"\s*\)", + rendered_text, + ) + + model_with_grants = create_sql_model( + name="test_grants_programmatic", + query=d.parse_one("SELECT 1 as id"), + grants={"select": ["user1", "user2"], "insert": ["admin"]}, + grants_target_layer=GrantsTargetLayer.ALL, + ) + assert model_with_grants.grants == {"select": ["user1", "user2"], "insert": ["admin"]} + assert model_with_grants.grants_target_layer == GrantsTargetLayer.ALL + rendered_text = d.format_model_expressions( + model_with_grants.render_definition(include_defaults=True) + ) + assert "grants_target_layer 'all'" in rendered_text + assert re.search( + r"grants\s*\(" + r"\s*'select'\s*=\s*ARRAY\('user1',\s*'user2'\)," + r"\s*'insert'\s*=\s*ARRAY\('admin'\)" + r"\s*\)", + rendered_text, + ) + + virtual_expressions = d.parse( + """ + MODEL ( + name test.virtual_grants_model, + kind FULL, + grants_target_layer virtual + ); + SELECT 1 as id + """ + ) + virtual_model = load_sql_based_model(virtual_expressions) + assert virtual_model.grants_target_layer == GrantsTargetLayer.VIRTUAL + + default_expressions = d.parse( + """ + MODEL ( + name test.default_grants_model, + kind FULL + ); + SELECT 1 as id + """ + ) + default_model = load_sql_based_model(default_expressions) + assert default_model.grants_target_layer == GrantsTargetLayer.VIRTUAL # default value + + def test_render_definition_partitioned_by(): # no parenthesis in definition, no parenthesis when rendered model = load_sql_based_model( @@ -11717,3 +11806,254 @@ def my_macro(evaluator): model = context.get_model("test_model", raise_if_missing=True) assert model.render_query_or_raise().sql() == 'SELECT 3 AS "c"' + + +def test_grants(): + expressions = d.parse(""" + MODEL ( + name test.table, + kind FULL, + grants ( + 'select' = ['user1', 123, admin_role, 'user2'], + 'insert' = 'admin', + 'roles/bigquery.dataViewer' = ["group:data_eng@company.com", 'user:someone@company.com'], + 'update' = 'admin' + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model(expressions) + assert model.grants == { + "select": ["user1", "123", "admin_role", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": ["group:data_eng@company.com", "user:someone@company.com"], + "update": ["admin"], + } + + model = create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind="FULL", + grants={ + "select": ["user1", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": "user:data_eng@company.com", + }, + ) + assert model.grants == { + "select": ["user1", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": ["user:data_eng@company.com"], + } + + +@pytest.mark.parametrize( + "kind", + [ + "FULL", + "VIEW", + SeedKind(path="test.csv"), + IncrementalByTimeRangeKind(time_column="ds"), + IncrementalByUniqueKeyKind(unique_key="id"), + ], +) +def test_grants_valid_model_kinds(kind: t.Union[str, _ModelKind]): + model = create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind=kind, + grants={"select": ["user1", "user2"], "insert": ["admin_user"]}, + ) + assert model.grants == {"select": ["user1", "user2"], "insert": ["admin_user"]} + + +@pytest.mark.parametrize( + "kind", + [ + "EXTERNAL", + "EMBEDDED", + ], +) +def test_grants_invalid_model_kind_errors(kind: str): + with pytest.raises(ValidationError, match=rf".*grants cannot be set for {kind}.*"): + create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind=kind, + grants={"select": ["user1"], "insert": ["admin_user"]}, + ) + + +def test_model_kind_supports_grants(): + assert FullKind().supports_grants is True + assert ViewKind().supports_grants is True + assert IncrementalByTimeRangeKind(time_column="ds").supports_grants is True + assert IncrementalByUniqueKeyKind(unique_key=["id"]).supports_grants is True + assert SCDType2ByTimeKind(unique_key=["id"]).supports_grants is True + + assert EmbeddedKind().supports_grants is False + assert ExternalKind().supports_grants is False + + +def test_grants_validation_no_grants(): + model = create_sql_model("db.table", parse_one("SELECT 1 AS id"), kind="FULL") + assert model.grants is None + + +def test_grants_validation_empty_grantees(): + model = create_sql_model( + "db.table", parse_one("SELECT 1 AS id"), kind="FULL", grants={"select": []} + ) + assert model.grants == {"select": []} + + +def test_grants_single_value_conversions(): + expressions = d.parse(f""" + MODEL ( + name test.nested_arrays, + kind FULL, + grants ( + 'select' = "user1", update = user2 + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model(expressions) + assert model.grants == {"select": ["user1"], "update": ["user2"]} + + model = create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind="FULL", + grants={"select": "user1", "insert": 123}, + ) + assert model.grants == {"select": ["user1"], "insert": ["123"]} + + +@pytest.mark.parametrize( + "grantees", + [ + "('user1', ('user2', 'user3'), 'user4')", + "('user1', ['user2', 'user3'], user4)", + "['user1', ['user2', user3], 'user4']", + "[user1, ('user2', \"user3\"), 'user4']", + ], +) +def test_grants_array_flattening(grantees: str): + expressions = d.parse(f""" + MODEL ( + name test.nested_arrays, + kind FULL, + grants ( + 'select' = {grantees} + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model(expressions) + assert model.grants == {"select": ["user1", "user2", "user3", "user4"]} + + +def test_grants_macro_var_resolved(): + expressions = d.parse(""" + MODEL ( + name test.macro_grants, + kind FULL, + grants ( + 'select' = @VAR('readers'), + 'insert' = @VAR('writers') + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model( + expressions, variables={"readers": ["user1", "user2"], "writers": "admin"} + ) + assert model.grants == { + "select": ["user1", "user2"], + "insert": ["admin"], + } + + +def test_grants_macro_var_in_array_flattening(): + expressions = d.parse(""" + MODEL ( + name test.macro_in_array, + kind FULL, + grants ( + 'select' = ['user1', @VAR('admins'), 'user3'] + ) + ); + SELECT 1 as id + """) + + model = load_sql_based_model(expressions, variables={"admins": ["admin1", "admin2"]}) + assert model.grants == {"select": ["user1", "admin1", "admin2", "user3"]} + + model2 = load_sql_based_model(expressions, variables={"admins": "super_admin"}) + assert model2.grants == {"select": ["user1", "super_admin", "user3"]} + + +def test_grants_dynamic_permission_names(): + expressions = d.parse(""" + MODEL ( + name test.dynamic_keys, + kind FULL, + grants ( + @VAR('read_perm') = ['user1', 'user2'], + @VAR('write_perm') = ['admin'] + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model( + expressions, variables={"read_perm": "select", "write_perm": "insert"} + ) + assert model.grants == {"select": ["user1", "user2"], "insert": ["admin"]} + + +def test_grants_unresolved_macro_errors(): + expressions1 = d.parse(""" + MODEL (name test.bad1, kind FULL, grants ('select' = @VAR('undefined'))); + SELECT 1 as id + """) + with pytest.raises(ConfigError, match=r"Invalid grants configuration for 'select': NULL value"): + load_sql_based_model(expressions1) + + expressions2 = d.parse(""" + MODEL (name test.bad2, kind FULL, grants (@VAR('undefined') = ['user'])); + SELECT 1 as id + """) + with pytest.raises(ConfigError, match=r"Invalid grants configuration.*NULL value"): + load_sql_based_model(expressions2) + + expressions3 = d.parse(""" + MODEL (name test.bad3, kind FULL, grants ('select' = ['user', @VAR('undefined')])); + SELECT 1 as id + """) + with pytest.raises(ConfigError, match=r"Invalid grants configuration for 'select': NULL value"): + load_sql_based_model(expressions3) + + +def test_grants_empty_values(): + model1 = create_sql_model( + "db.table", parse_one("SELECT 1 AS id"), kind="FULL", grants={"select": []} + ) + assert model1.grants == {"select": []} + + model2 = create_sql_model("db.table", parse_one("SELECT 1 AS id"), kind="FULL") + assert model2.grants is None + + +@pytest.mark.parametrize( + "kind, expected", + [ + ("VIEW", DataObjectType.VIEW), + ("FULL", DataObjectType.TABLE), + ("MANAGED", DataObjectType.MANAGED_TABLE), + (ViewKind(materialized=True), DataObjectType.MATERIALIZED_VIEW), + ], +) +def test_grants_table_type(kind: t.Union[str, _ModelKind], expected: DataObjectType): + model = create_sql_model("test_table", parse_one("SELECT 1 as id"), kind=kind) + assert model.grants_table_type == expected diff --git a/tests/core/test_snapshot.py b/tests/core/test_snapshot.py index c769991b86..1acc6cc265 100644 --- a/tests/core/test_snapshot.py +++ b/tests/core/test_snapshot.py @@ -168,6 +168,7 @@ def test_json(snapshot: Snapshot): "enabled": True, "extract_dependencies_from_query": True, "virtual_environment_mode": "full", + "grants_target_layer": "virtual", }, "name": '"name"', "parents": [{"name": '"parent"."tbl"', "identifier": snapshot.parents[0].identifier}], @@ -181,6 +182,36 @@ def test_json(snapshot: Snapshot): } +def test_json_with_grants(make_snapshot: t.Callable): + from sqlmesh.core.model.meta import GrantsTargetLayer + + model = SqlModel( + name="name", + kind=dict(time_column="ds", batch_size=30, name=ModelKindName.INCREMENTAL_BY_TIME_RANGE), + owner="owner", + dialect="spark", + cron="1 0 * * *", + start="2020-01-01", + query=parse_one("SELECT @EACH([1, 2], x -> x), ds FROM parent.tbl"), + grants={"SELECT": ["role1", "role2"], "INSERT": ["role3"]}, + grants_target_layer=GrantsTargetLayer.VIRTUAL, + ) + snapshot = make_snapshot(model) + + json_str = snapshot.json() + json_data = json.loads(json_str) + assert ( + json_data["node"]["grants"] + == "('SELECT' = ARRAY('role1', 'role2'), 'INSERT' = ARRAY('role3'))" + ) + assert json_data["node"]["grants_target_layer"] == "virtual" + + reparsed_snapshot = Snapshot.model_validate_json(json_str) + assert isinstance(reparsed_snapshot.node, SqlModel) + assert reparsed_snapshot.node.grants == {"SELECT": ["role1", "role2"], "INSERT": ["role3"]} + assert reparsed_snapshot.node.grants_target_layer == GrantsTargetLayer.VIRTUAL + + def test_json_custom_materialization(make_snapshot: t.Callable): model = SqlModel( name="name", @@ -954,7 +985,7 @@ def test_fingerprint(model: Model, parent_model: Model): original_fingerprint = SnapshotFingerprint( data_hash="2406542604", - metadata_hash="3341445192", + metadata_hash="1056339358", ) assert fingerprint == original_fingerprint @@ -1014,8 +1045,8 @@ def test_fingerprint_seed_model(): ) expected_fingerprint = SnapshotFingerprint( - data_hash="1586624913", - metadata_hash="2315134974", + data_hash="2112858704", + metadata_hash="2674364560", ) model = load_sql_based_model(expressions, path=Path("./examples/sushi/models/test_model.sql")) @@ -1054,7 +1085,7 @@ def test_fingerprint_jinja_macros(model: Model): ) original_fingerprint = SnapshotFingerprint( data_hash="93332825", - metadata_hash="3341445192", + metadata_hash="1056339358", ) fingerprint = fingerprint_from_node(model, nodes={}) @@ -1131,6 +1162,40 @@ def test_fingerprint_virtual_properties(model: Model, parent_model: Model): assert updated_fingerprint.data_hash == fingerprint.data_hash +def test_fingerprint_grants(model: Model, parent_model: Model): + from sqlmesh.core.model.meta import GrantsTargetLayer + + original_model = deepcopy(model) + fingerprint = fingerprint_from_node(model, nodes={}) + + updated_model = SqlModel( + **original_model.dict(), + grants={"SELECT": ["role1", "role2"]}, + ) + updated_fingerprint = fingerprint_from_node(updated_model, nodes={}) + + assert updated_fingerprint != fingerprint + assert updated_fingerprint.metadata_hash != fingerprint.metadata_hash + assert updated_fingerprint.data_hash == fingerprint.data_hash + + different_grants_model = SqlModel( + **original_model.dict(), + grants={"SELECT": ["role3"], "INSERT": ["role4"]}, + ) + different_grants_fingerprint = fingerprint_from_node(different_grants_model, nodes={}) + + assert different_grants_fingerprint.metadata_hash != updated_fingerprint.metadata_hash + assert different_grants_fingerprint.metadata_hash != fingerprint.metadata_hash + + target_layer_model = SqlModel( + **{**original_model.dict(), "grants_target_layer": GrantsTargetLayer.PHYSICAL}, + grants={"SELECT": ["role1", "role2"]}, + ) + target_layer_fingerprint = fingerprint_from_node(target_layer_model, nodes={}) + + assert target_layer_fingerprint.metadata_hash != updated_fingerprint.metadata_hash + + def test_tableinfo_equality(): snapshot_a = SnapshotTableInfo( name="test_schema.a", diff --git a/tests/core/test_snapshot_evaluator.py b/tests/core/test_snapshot_evaluator.py index 19685e81c3..68061544a8 100644 --- a/tests/core/test_snapshot_evaluator.py +++ b/tests/core/test_snapshot_evaluator.py @@ -41,8 +41,10 @@ load_sql_based_model, ExternalModel, model, + create_sql_model, ) from sqlmesh.core.model.kind import OnDestructiveChange, ExternalKind, OnAdditiveChange +from sqlmesh.core.model.meta import GrantsTargetLayer from sqlmesh.core.node import IntervalUnit from sqlmesh.core.snapshot import ( DeployabilityIndex, @@ -55,7 +57,19 @@ SnapshotTableCleanupTask, ) from sqlmesh.core.snapshot.definition import to_view_mapping -from sqlmesh.core.snapshot.evaluator import CustomMaterialization, SnapshotCreationFailedError +from sqlmesh.core.snapshot.evaluator import ( + CustomMaterialization, + EngineManagedStrategy, + FullRefreshStrategy, + IncrementalByPartitionStrategy, + IncrementalByTimeRangeStrategy, + IncrementalByUniqueKeyStrategy, + IncrementalUnmanagedStrategy, + MaterializableStrategy, + SCDType2Strategy, + SnapshotCreationFailedError, + ViewStrategy, +) from sqlmesh.utils.concurrency import NodeExecutionFailedError from sqlmesh.utils.date import to_timestamp from sqlmesh.utils.errors import ( @@ -908,7 +922,7 @@ def test_pre_hook_forward_only_clone( time_column ds ) ); - + {pre_statement}; SELECT a::int, ds::string FROM tbl; @@ -4858,3 +4872,524 @@ def mutate_view_properties(*args, **kwargs): # Both calls should have view_properties with security invoker assert props == ["'SECURITY INVOKER'", "'SECURITY INVOKER'"] + + +def _create_grants_test_model( + grants=None, kind="FULL", grants_target_layer=None, virtual_environment_mode=None +): + if kind == "SEED": + from sqlmesh.core.model.definition import create_seed_model + from sqlmesh.core.model.kind import SeedKind + import tempfile + import os + + # Create a temporary CSV file for the test + temp_csv = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) + temp_csv.write("id,name\n1,test\n2,test2\n") + temp_csv.flush() + temp_csv.close() + + seed_kind_config = {"name": "SEED", "path": temp_csv.name} + seed_kind = SeedKind(**seed_kind_config) + + kwargs = {} + if grants is not None: + kwargs["grants"] = grants + if grants_target_layer is not None: + kwargs["grants_target_layer"] = grants_target_layer + + model = create_seed_model("test_model", seed_kind, **kwargs) + + # Clean up the temporary file + os.unlink(temp_csv.name) + + return model + + # Handle regular SQL models + kwargs = { + "kind": kind, + } + if grants is not None: + kwargs["grants"] = grants + if grants_target_layer is not None: + kwargs["grants_target_layer"] = grants_target_layer + if virtual_environment_mode is not None: + kwargs["virtual_environment_mode"] = virtual_environment_mode + + # Add column annotations for non-SEED models to ensure table creation + if kind != "SEED": + kwargs["columns"] = { + "id": "INT", + "ds": "DATE", + "updated_at": "TIMESTAMP", + } + + # Add required fields for specific model kinds + if kind == "INCREMENTAL_BY_TIME_RANGE": + kwargs["kind"] = {"name": "INCREMENTAL_BY_TIME_RANGE", "time_column": "ds"} + elif kind == "INCREMENTAL_BY_PARTITION": + kwargs["kind"] = {"name": "INCREMENTAL_BY_PARTITION"} + kwargs["partitioned_by"] = ["ds"] # This goes on the model, not the kind + elif kind == "INCREMENTAL_BY_UNIQUE_KEY": + kwargs["kind"] = {"name": "INCREMENTAL_BY_UNIQUE_KEY", "unique_key": ["id"]} + elif kind == "INCREMENTAL_UNMANAGED": + kwargs["kind"] = {"name": "INCREMENTAL_UNMANAGED"} + elif kind == "SCD_TYPE_2": + kwargs["kind"] = { + "name": "SCD_TYPE_2", + "unique_key": ["id"], + "updated_at_name": "updated_at", + } + + return create_sql_model( + "test_model", + parse_one("SELECT 1 as id, CURRENT_DATE as ds, CURRENT_TIMESTAMP as updated_at"), + **kwargs, + ) + + +@pytest.mark.parametrize( + "target_layer,apply_layer,expected_call_count", + [ + (GrantsTargetLayer.ALL, GrantsTargetLayer.PHYSICAL, 1), + (GrantsTargetLayer.ALL, GrantsTargetLayer.VIRTUAL, 1), + (GrantsTargetLayer.PHYSICAL, GrantsTargetLayer.PHYSICAL, 1), + (GrantsTargetLayer.PHYSICAL, GrantsTargetLayer.VIRTUAL, 0), + (GrantsTargetLayer.VIRTUAL, GrantsTargetLayer.PHYSICAL, 0), + (GrantsTargetLayer.VIRTUAL, GrantsTargetLayer.VIRTUAL, 1), + ], +) +def test_apply_grants_target_layer( + target_layer: GrantsTargetLayer, + apply_layer: GrantsTargetLayer, + expected_call_count: int, + adapter_mock: Mock, + mocker: MockerFixture, +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + strategy = ViewStrategy(adapter_mock) + + model = _create_grants_test_model( + grants={"select": ["user1"]}, grants_target_layer=target_layer + ) + + strategy._apply_grants(model, "test_table", apply_layer) + + if expected_call_count > 0: + assert sync_grants_mock.call_count == expected_call_count + else: + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "model_kind_name", + [ + "FULL", + "INCREMENTAL_BY_TIME_RANGE", + "SEED", + "MANAGED", + "SCD_TYPE_2", + "VIEW", + ], +) +def test_grants_create_model_kind( + model_kind_name: str, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + grants = {"select": ["user1"]} + model = _create_grants_test_model( + grants=grants, kind=model_kind_name, grants_target_layer=GrantsTargetLayer.ALL + ) + snapshot = make_snapshot(model) + + evaluator = SnapshotEvaluator(adapter_mock) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator.create([snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + + +@pytest.mark.parametrize( + "target_layer", + [ + GrantsTargetLayer.PHYSICAL, + GrantsTargetLayer.VIRTUAL, + GrantsTargetLayer.ALL, + ], +) +def test_grants_target_layer( + target_layer: GrantsTargetLayer, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + evaluator = SnapshotEvaluator(adapter_mock) + + grants = {"select": ["user1"]} + model = create_sql_model( + "test_schema.test_model", + parse_one("SELECT 1 as id"), + kind="FULL", + grants=grants, + grants_target_layer=target_layer, + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator.create([snapshot], {}) + if target_layer == GrantsTargetLayer.VIRTUAL: + assert sync_grants_mock.call_count == 0 + else: + assert sync_grants_mock.call_count == 1 + assert sync_grants_mock.call_args[0][1] == grants + sync_grants_mock.reset_mock() + evaluator.promote([snapshot], EnvironmentNamingInfo(name="prod")) + if target_layer == GrantsTargetLayer.VIRTUAL: + assert sync_grants_mock.call_count == 1 + elif target_layer == GrantsTargetLayer.PHYSICAL: + # Physical layer: no grants applied during promotion (already applied during create) + assert sync_grants_mock.call_count == 0 + else: # target_layer == GrantsTargetLayer.ALL + # All layers: only virtual grants applied during promotion (physical already done in create) + assert sync_grants_mock.call_count == 1 + + +def test_grants_update( + adapter_mock: Mock, mocker: MockerFixture, make_snapshot: t.Callable[..., Snapshot] +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + + model = create_sql_model( + "test_schema.test_model", + parse_one("SELECT 1 as id"), + kind="FULL", + grants={"select": ["user1"]}, + grants_target_layer=GrantsTargetLayer.ALL, + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator.create([snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user1"]} + + # Update model query AND change grants + updated_model_dict = model.dict() + updated_model_dict["query"] = parse_one("SELECT 1 as id, 2 as value") + updated_model_dict["grants"] = {"select": ["user2", "user3"], "insert": ["admin"]} + updated_model = SqlModel.parse_obj(updated_model_dict) + + new_snapshot = make_snapshot(updated_model) + new_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + sync_grants_mock.reset_mock() + evaluator.create([new_snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user2", "user3"], "insert": ["admin"]} + + # Update model query AND remove grants + updated_model_dict = model.dict() + updated_model_dict["query"] = parse_one("SELECT 1 as id, 'updated' as status") + updated_model_dict["grants"] = {} + updated_model = SqlModel.parse_obj(updated_model_dict) + + new_snapshot = make_snapshot(updated_model) + new_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + sync_grants_mock.reset_mock() + evaluator.create([new_snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {} + + +def test_grants_create_and_evaluate( + adapter_mock: Mock, mocker: MockerFixture, make_snapshot: t.Callable[..., Snapshot] +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + + model = load_sql_based_model( + parse( # type: ignore + """ + MODEL ( + name test_schema.test_model, + kind INCREMENTAL_BY_TIME_RANGE (time_column ds), + grants ( + 'select' = ['reader1', 'reader2'], + 'insert' = ['writer'] + ), + grants_target_layer 'all' + ); + SELECT ds::DATE, value::INT FROM source WHERE ds BETWEEN @start_ds AND @end_ds; + """ + ) + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator.create([snapshot], {}) + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == { + "select": ["reader1", "reader2"], + "insert": ["writer"], + } + + sync_grants_mock.reset_mock() + evaluator.evaluate( + snapshot, start="2020-01-01", end="2020-01-02", execution_time="2020-01-02", snapshots={} + ) + # Evaluate should not reapply grants + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "strategy_class", + [ + EngineManagedStrategy, + FullRefreshStrategy, + IncrementalByTimeRangeStrategy, + IncrementalByPartitionStrategy, + IncrementalUnmanagedStrategy, + IncrementalByUniqueKeyStrategy, + SCDType2Strategy, + # SeedStrategy excluded because seeds do not support migrations + ], +) +def test_grants_materializable_strategy_migrate( + strategy_class: t.Type[MaterializableStrategy], + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + adapter_mock.get_alter_operations.return_value = [] + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + strategy = strategy_class(adapter_mock) + grants = {"select": ["user1"]} + model = _create_grants_test_model(grants=grants, grants_target_layer=GrantsTargetLayer.ALL) + snapshot = make_snapshot(model) + + strategy.migrate( + "target_table", + "source_table", + snapshot, + ignore_destructive=False, + ignore_additive=False, + allow_destructive_snapshots=set(), + allow_additive_snapshots=set(), + ) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + + +def test_grants_clone_snapshot_in_dev( + adapter_mock: Mock, mocker: MockerFixture, make_snapshot: t.Callable[..., Snapshot] +): + adapter_mock.SUPPORTS_CLONING = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + grants = {"select": ["user1", "user2"]} + model = _create_grants_test_model(grants=grants, grants_target_layer=GrantsTargetLayer.ALL) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator._clone_snapshot_in_dev( + snapshot, {}, DeployabilityIndex.all_deployable(), {}, {}, set(), set() + ) + + sync_grants_mock.assert_called_once() + assert ( + sync_grants_mock.call_args[0][0].sql() + == f"sqlmesh__default.test_model__{snapshot.version}__dev" + ) + assert sync_grants_mock.call_args[0][1] == grants + + +@pytest.mark.parametrize( + "model_kind_name", + [ + "INCREMENTAL_BY_TIME_RANGE", + "SEED", + ], +) +def test_grants_evaluator_insert_without_replace_query_for_model( + model_kind_name: str, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + adapter_mock.table_exists.return_value = False # Table doesn't exist + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + + grants = {"select": ["reader1", "reader2"]} + model = _create_grants_test_model( + grants=grants, kind=model_kind_name, grants_target_layer=GrantsTargetLayer.ALL + ) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator.evaluate( + snapshot, + start="2023-01-01", + end="2023-01-01", + execution_time="2023-01-01", + snapshots={}, + ) + + # Grants are applied during the table creation phase, not during insert + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + + sync_grants_mock.reset_mock() + adapter_mock.table_exists.return_value = True + snapshot.add_interval("2023-01-01", "2023-01-01") + evaluator.evaluate( + snapshot, + start="2023-01-02", # Different date from existing interval + end="2023-01-02", + execution_time="2023-01-02", + snapshots={}, + ) + + # Should not apply grants since it's not the first insert + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "model_kind_name", + [ + "INCREMENTAL_BY_PARTITION", + "INCREMENTAL_BY_UNIQUE_KEY", + "INCREMENTAL_UNMANAGED", + "FULL", + "SCD_TYPE_2", + ], +) +def test_grants_evaluator_insert_with_replace_query_for_model( + model_kind_name: str, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + adapter_mock.table_exists.return_value = False # Table doesn't exist + adapter_mock.columns.return_value = { + "id": exp.DataType.build("int"), + "ds": exp.DataType.build("date"), + } + + evaluator = SnapshotEvaluator(adapter_mock) + + grants = {"select": ["user1"]} + model = _create_grants_test_model( + grants=grants, kind=model_kind_name, grants_target_layer=GrantsTargetLayer.ALL + ) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + # Now evaluate the snapshot (this should apply grants during first insert) + evaluator.evaluate( + snapshot, + start="2023-01-01", + end="2023-01-01", + execution_time="2023-01-01", + snapshots={}, + ) + + # Should be called twice more during evaluate: once creating table, + # once during first insert with _replace_query_for_model() + assert sync_grants_mock.call_count == 2 + assert sync_grants_mock.call_args[0][1] == grants + + sync_grants_mock.reset_mock() + adapter_mock.table_exists.return_value = True + snapshot.add_interval("2023-01-01", "2023-01-01") + evaluator.evaluate( + snapshot, + start="2023-01-02", # Different date from existing interval + end="2023-01-02", + execution_time="2023-01-02", + snapshots={}, + ) + + if model_kind_name in ("FULL", "SCD_TYPE_2"): + # Full refresh and SCD_TYPE_2 always recreate the table, so grants are always applied + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + else: + # Should not apply grants since it's not the first insert + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "model_grants_target_layer", + [ + GrantsTargetLayer.ALL, + GrantsTargetLayer.VIRTUAL, + GrantsTargetLayer.PHYSICAL, + ], +) +def test_grants_in_production_with_dev_only_vde( + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], + model_grants_target_layer: GrantsTargetLayer, +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + from sqlmesh.core.model.meta import VirtualEnvironmentMode, GrantsTargetLayer + from sqlmesh.core.snapshot.definition import DeployabilityIndex + + model_virtual_grants = _create_grants_test_model( + grants={"select": ["user1"], "insert": ["role1"]}, + grants_target_layer=model_grants_target_layer, + virtual_environment_mode=VirtualEnvironmentMode.DEV_ONLY, + ) + + snapshot = make_snapshot(model_virtual_grants) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator = SnapshotEvaluator(adapter_mock) + # create will apply grants to physical layer tables + deployability_index = DeployabilityIndex.all_deployable() + evaluator.create([snapshot], {}, deployability_index=deployability_index) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user1"], "insert": ["role1"]} + + # Non-deployable (dev) env + sync_grants_mock.reset_mock() + deployability_index = DeployabilityIndex.none_deployable() + evaluator.create([snapshot], {}, deployability_index=deployability_index) + if model_grants_target_layer == GrantsTargetLayer.VIRTUAL: + sync_grants_mock.assert_not_called() + else: + # Should still apply grants to physical table when target layer is ALL or PHYSICAL + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user1"], "insert": ["role1"]} diff --git a/tests/dbt/test_model.py b/tests/dbt/test_model.py index e29c6768bf..eb16a4b4b1 100644 --- a/tests/dbt/test_model.py +++ b/tests/dbt/test_model.py @@ -9,10 +9,12 @@ from sqlmesh.core.model import TimeColumn, IncrementalByTimeRangeKind from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange from sqlmesh.core.state_sync.db.snapshot import _snapshot_to_json +from sqlmesh.core.config.common import VirtualEnvironmentMode +from sqlmesh.core.model.meta import GrantsTargetLayer from sqlmesh.dbt.common import Dependencies from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.model import ModelConfig -from sqlmesh.dbt.target import PostgresConfig +from sqlmesh.dbt.target import BigQueryConfig, DuckDbConfig, PostgresConfig from sqlmesh.dbt.test import TestConfig from sqlmesh.utils.yaml import YAML from sqlmesh.utils.date import to_ds @@ -853,3 +855,176 @@ def test_load_custom_materialisations(sushi_test_dbt_context: Context) -> None: context.load() assert context.get_model("sushi.custom_incremental_model") assert context.get_model("sushi.custom_incremental_with_filter") + + +def test_model_grants_to_sqlmesh_grants_config() -> None: + grants_config = { + "select": ["user1", "user2"], + "insert": ["admin_user"], + "update": ["power_user"], + } + model_config = ModelConfig( + name="test_model", + sql="SELECT 1 as id", + grants=grants_config, + path=Path("test_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + model_grants = sqlmesh_model.grants + assert model_grants == grants_config + + assert sqlmesh_model.grants_target_layer == GrantsTargetLayer.default + + +def test_model_grants_empty_permissions() -> None: + model_config = ModelConfig( + name="test_model_empty", + sql="SELECT 1 as id", + grants={"select": [], "insert": ["admin_user"]}, + path=Path("test_model_empty.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + model_grants = sqlmesh_model.grants + expected_grants = {"select": [], "insert": ["admin_user"]} + assert model_grants == expected_grants + + +def test_model_no_grants() -> None: + model_config = ModelConfig( + name="test_model_no_grants", + sql="SELECT 1 as id", + path=Path("test_model_no_grants.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is None + + +def test_model_empty_grants() -> None: + model_config = ModelConfig( + name="test_model_empty_grants", + sql="SELECT 1 as id", + grants={}, + path=Path("test_model_empty_grants.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is None + + +def test_model_grants_valid_special_characters() -> None: + valid_grantees = [ + "user@domain.com", + "service-account@project.iam.gserviceaccount.com", + "group:analysts", + '"quoted user"', + "`backtick user`", + "user_with_underscores", + "user.with.dots", + ] + + model_config = ModelConfig( + name="test_model_special_chars", + sql="SELECT 1 as id", + grants={"select": valid_grantees}, + path=Path("test_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is not None + assert "select" in grants_config + assert grants_config["select"] == valid_grantees + + +def test_model_grants_engine_specific_bigquery() -> None: + model_config = ModelConfig( + name="test_model_bigquery", + sql="SELECT 1 as id", + grants={ + "bigquery.dataviewer": ["user@domain.com"], + "select": ["analyst@company.com"], + }, + path=Path("test_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = BigQueryConfig( + name="bigquery_target", + project="test-project", + dataset="test_dataset", + location="US", + database="test-project", + schema="test_dataset", + ) + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is not None + assert grants_config["bigquery.dataviewer"] == ["user@domain.com"] + assert grants_config["select"] == ["analyst@company.com"] + + +def test_ephemeral_model_ignores_grants() -> None: + """Test that ephemeral models ignore grants configuration.""" + model_config = ModelConfig( + name="ephemeral_model", + sql="SELECT 1 as id", + materialized="ephemeral", + grants={"select": ["reporter", "analyst"]}, + path=Path("ephemeral_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + assert sqlmesh_model.kind.is_embedded + assert sqlmesh_model.grants is None # grants config is skipped for ephemeral / embedded models From 93c7a10c1892e41b56360ccae76526ce41025c41 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Fri, 3 Oct 2025 11:41:12 +0300 Subject: [PATCH 045/173] Chore: make console width deterministic in tests (#5477) --- tests/cli/test_cli.py | 6 ++++-- tests/utils/test_helpers.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 1be44e18f9..d2df451fef 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -32,7 +32,7 @@ def mock_runtime_env(monkeypatch): @pytest.fixture(scope="session") def runner() -> CliRunner: - return CliRunner() + return CliRunner(env={"COLUMNS": "80"}) @contextmanager @@ -1887,7 +1887,9 @@ def test_init_interactive_cli_mode_simple(runner: CliRunner, tmp_path: Path): assert "no_diff: true" in config_path.read_text() -def test_init_interactive_engine_install_msg(runner: CliRunner, tmp_path: Path): +def test_init_interactive_engine_install_msg(runner: CliRunner, tmp_path: Path, monkeypatch): + monkeypatch.setattr("sqlmesh.utils.rich.console.width", 80) + # Engine install text should not appear for built-in engines like DuckDB # Input: 1 (DEFAULT template), 1 (duckdb engine), 1 (DEFAULT CLI mode) result = runner.invoke( diff --git a/tests/utils/test_helpers.py b/tests/utils/test_helpers.py index ae0742f1db..20a544512e 100644 --- a/tests/utils/test_helpers.py +++ b/tests/utils/test_helpers.py @@ -83,6 +83,7 @@ def test_wrapper(*args, **kwargs): orig_console = get_console() try: new_console = TerminalConsole() + new_console.console.width = 80 new_console.console.no_color = True set_console(new_console) func(*args, **kwargs) From 683133a7bae7b664059e58e02f06e7c96efc58c7 Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Fri, 3 Oct 2025 17:47:38 +0300 Subject: [PATCH 046/173] Chore: Fix flaky test by unliking dbt msgpack for deterministic behaviour (#5479) --- tests/dbt/cli/test_run.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/dbt/cli/test_run.py b/tests/dbt/cli/test_run.py index 7aeb8dd4d7..755553bb57 100644 --- a/tests/dbt/cli/test_run.py +++ b/tests/dbt/cli/test_run.py @@ -65,6 +65,12 @@ def test_run_with_changes_and_full_refresh( "select a, b, 'changed' as c from {{ ref('model_a') }}" ) + # Clear dbt's partial parse cache to ensure file changes are detected + # Without it dbt may use stale cached model definitions, causing flakiness + partial_parse_file = project_path / "target" / "sqlmesh_partial_parse.msgpack" + if partial_parse_file.exists(): + partial_parse_file.unlink() + # run with --full-refresh. this should: # - fully refresh model_a (pick up the new records from external_table) # - deploy the local change to model_b (introducing the 'changed' column) From 285b8f228b146dcce499be78f3b6708962ba53c5 Mon Sep 17 00:00:00 2001 From: Tori Wei <41123940+toriwei@users.noreply.github.com> Date: Fri, 3 Oct 2025 09:53:44 -0700 Subject: [PATCH 047/173] fix: parse column from cast expression for ScdType2 models (#5475) --- sqlmesh/dbt/model.py | 16 ++++++++++++++++ tests/dbt/test_transformation.py | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index f21eefe95d..09c410561d 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -172,6 +172,22 @@ def _validate_check_cols(cls, v: t.Union[str, t.List[str]]) -> t.Union[str, t.Li return "*" return ensure_list(v) + @field_validator("updated_at", mode="before") + @classmethod + def _validate_updated_at(cls, v: t.Optional[str]) -> t.Optional[str]: + """ + Extract column name if updated_at contains a cast. + + SCDType2ByTimeKind and SCDType2ByColumnKind expect a column, and the casting is done later. + """ + if v is None: + return None + parsed = d.parse_one(v) + if isinstance(parsed, exp.Cast) and isinstance(parsed.this, exp.Column): + return parsed.this.name + + return v + @field_validator("sql", mode="before") @classmethod def _validate_sql(cls, v: t.Union[str, SqlStr]) -> SqlStr: diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index a33e3ed843..141c160e7e 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -652,6 +652,23 @@ def test_model_kind(): == ManagedKind() ) + assert ModelConfig( + materialized=Materialization.SNAPSHOT, + unique_key=["id"], + updated_at="updated_at::timestamp", + strategy="timestamp", + dialect="redshift", + ).model_kind(context) == SCDType2ByTimeKind( + unique_key=["id"], + valid_from_name="dbt_valid_from", + valid_to_name="dbt_valid_to", + updated_at_as_valid_from=True, + updated_at_name="updated_at", + dialect="redshift", + on_destructive_change=OnDestructiveChange.IGNORE, + on_additive_change=OnAdditiveChange.ALLOW, + ) + def test_model_kind_snapshot_bigquery(): context = DbtContext() From 35269c906b3c08cbc3ab4377df66cbb611c1bd49 Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Fri, 3 Oct 2025 14:21:37 -0700 Subject: [PATCH 048/173] fix(web_common): make styling in some components more flexible (#5482) --- .../LineageColumnLevel/FactoryColumn.css | 31 ++++ .../LineageColumnLevel/FactoryColumn.tsx | 15 +- .../Lineage/LineageControlButton.tsx | 2 +- .../components/Lineage/LineageControlIcon.tsx | 1 + .../src/components/Lineage/LineageLayout.tsx | 1 + .../src/components/Lineage/help.test.ts | 142 ------------------ web/common/src/components/Lineage/help.ts | 41 ----- .../components/Lineage/node/NodeAppendix.tsx | 1 + .../src/components/Lineage/node/NodeBadge.tsx | 3 +- .../components/Lineage/node/NodeDetail.tsx | 1 + .../components/Lineage/node/NodeDivider.tsx | 7 +- .../components/Lineage/node/NodeHandle.tsx | 1 + .../Lineage/node/NodeHandleIcon.tsx | 1 + .../components/Lineage/node/NodeHeader.tsx | 1 + .../src/components/Metadata/Metadata.tsx | 2 +- .../components/VirtualList/FilterableList.css | 7 + .../components/VirtualList/FilterableList.tsx | 2 +- 17 files changed, 67 insertions(+), 192 deletions(-) create mode 100644 web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css new file mode 100644 index 0000000000..d6eea6674a --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css @@ -0,0 +1,31 @@ +:root { + --color-lineage-model-column-badge-background: var( + --color-lineage-node-badge-background + ); + --color-lineage-model-column-badge-foreground: var( + --color-lineage-node-badge-foreground + ); + + --color-lineage-model-column-metadata-label: var(--color-metadata-label); + --color-lineage-model-column-metadata-value: var(--color-metadata-value); + + --color-lineage-model-column-information-info: var(--color-information-info); +} + +.FactoryColumn__Metadata { + --color-metadata-label: var(--color-lineage-model-column-metadata-label); + --color-metadata-value: var(--color-lineage-model-column-metadata-value); +} + +.FactoryColumn__NodeBadge { + --color-lineage-node-badge-background: var( + --color-lineage-model-column-badge-background + ); + --color-lineage-node-badge-foreground: var( + --color-lineage-model-column-badge-foreground + ); +} + +.FactoryColumn__Information { + --color-typography-info: var(--color-lineage-model-column-information-info); +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx index 7b5e9e0ae0..19b73c3ef6 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx @@ -21,6 +21,8 @@ import { HorizontalContainer } from '@/components/HorizontalContainer/Horizontal import { Information } from '@/components/Typography/Information' import { LoadingContainer } from '@/components/LoadingContainer/LoadingContainer' +import './FactoryColumn.css' + export function FactoryColumn< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, @@ -184,7 +186,7 @@ export function FactoryColumn< function renderColumn() { return ( {renderColumnStates()} {description ? ( - + ) : ( @@ -205,9 +210,11 @@ export function FactoryColumn< } - value={{type}} + value={ + {type} + } className={cn( - 'relative overflow-visible group p-0', + 'FactoryColumn__Metadata relative overflow-visible group p-0', isDisabledColumn && 'cursor-not-allowed', className, )} diff --git a/web/common/src/components/Lineage/LineageControlButton.tsx b/web/common/src/components/Lineage/LineageControlButton.tsx index 5f1abaa952..6f66f90db7 100644 --- a/web/common/src/components/Lineage/LineageControlButton.tsx +++ b/web/common/src/components/Lineage/LineageControlButton.tsx @@ -23,7 +23,7 @@ export function LineageControlButton({ delayDuration={0} className="px-2 py-1 text-xs rounded-sm font-semibold bg-lineage-control-button-tooltip-background text-lineage-control-button-tooltip-foreground" trigger={ -
+
{ }) }) - describe('getTransformedModelEdges', () => { - test('should transform edges using the provided transform function', () => { - const adjacencyListKeys = ['model1', 'model2', 'model3'] - const lineageAdjacencyList: LineageAdjacencyList = { - model1: ['model2', 'model3'], - model2: ['model3'], - model3: [], - } - - const transformEdge = ( - type: string, - edgeId: EdgeId, - sourceId: NodeId, - targetId: NodeId, - ) => ({ - id: edgeId, - source: sourceId, - target: targetId, - type, - zIndex: 1, - }) - - const result = getTransformedModelEdges( - adjacencyListKeys, - lineageAdjacencyList, - transformEdge, - ) - - expect(result).toHaveLength(3) - - const model1Id = toNodeID('model1') - const model2Id = toNodeID('model2') - const model3Id = toNodeID('model3') - - expect(result[0]).toEqual({ - id: toEdgeID('model1', 'model2'), - source: model1Id, - target: model2Id, - type: 'edge', - zIndex: 1, - }) - expect(result[1]).toEqual({ - id: toEdgeID('model1', 'model3'), - source: model1Id, - target: model3Id, - type: 'edge', - zIndex: 1, - }) - expect(result[2]).toEqual({ - id: toEdgeID('model2', 'model3'), - source: model2Id, - target: model3Id, - type: 'edge', - zIndex: 1, - }) - }) - - test('should skip edges where target is not in adjacency list', () => { - const adjacencyListKeys = ['model1'] - const lineageAdjacencyList: LineageAdjacencyList = { - model1: ['model2'], // model2 is not in the adjacency list - } - - const transformEdge = ( - type: string, - edgeId: EdgeId, - sourceId: NodeId, - targetId: NodeId, - ) => ({ - id: edgeId, - source: sourceId, - target: targetId, - type, - zIndex: 1, - }) - - const result = getTransformedModelEdges( - adjacencyListKeys, - lineageAdjacencyList, - transformEdge, - ) - - expect(result).toHaveLength(0) - }) - - test('should handle empty adjacency list', () => { - const adjacencyListKeys: string[] = [] - const lineageAdjacencyList: LineageAdjacencyList = {} - - const transformEdge = ( - type: string, - edgeId: EdgeId, - sourceId: NodeId, - targetId: NodeId, - ) => ({ - id: edgeId, - source: sourceId, - target: targetId, - type, - zIndex: 1, - }) - - const result = getTransformedModelEdges( - adjacencyListKeys, - lineageAdjacencyList, - transformEdge, - ) - - expect(result).toHaveLength(0) - }) - - test('should handle nodes with no targets', () => { - const adjacencyListKeys = ['model1', 'model2'] - const lineageAdjacencyList = { - model1: [], - model2: null, - } as unknown as LineageAdjacencyList - - const transformEdge = ( - type: string, - edgeId: EdgeId, - sourceId: NodeId, - targetId: NodeId, - ) => ({ - id: edgeId, - source: sourceId, - target: targetId, - type, - zIndex: 1, - }) - - const result = getTransformedModelEdges( - adjacencyListKeys, - lineageAdjacencyList, - transformEdge, - ) - - expect(result).toHaveLength(0) - }) - }) - describe('getTransformedModelEdgesSourceTargets', () => { test('should transform edges from source to targets using the provided transform function', () => { const adjacencyListKeys = ['model1', 'model2', 'model3'] diff --git a/web/common/src/components/Lineage/help.ts b/web/common/src/components/Lineage/help.ts index 1e5d5a9d6b..97f4ad9542 100644 --- a/web/common/src/components/Lineage/help.ts +++ b/web/common/src/components/Lineage/help.ts @@ -57,47 +57,6 @@ export function getTransformedNodes< return nodesMap } -export function getTransformedModelEdges< - TAdjacencyListKey extends string, - TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, - TEdgeID extends string = EdgeId, - TPortID extends string = PortId, ->( - adjacencyListKeys: TAdjacencyListKey[], - lineageAdjacencyList: LineageAdjacencyList, - transformEdge: TransformEdgeFn, -) { - const nodesCount = adjacencyListKeys.length - - if (nodesCount === 0) return [] - - const edges = [] - - for (let i = 0; i < nodesCount; i++) { - const adjacencyListKey = adjacencyListKeys[i] - const nodeId = toNodeID(adjacencyListKey) - const targets = lineageAdjacencyList[adjacencyListKey] - const targetsCount = targets?.length || 0 - - if (targets == null || targetsCount < 1) continue - - for (let j = 0; j < targetsCount; j++) { - const target = targets[j] - - if (!(target in lineageAdjacencyList)) continue - - const edgeId = toEdgeID(adjacencyListKey, target) - - edges.push( - transformEdge('edge', edgeId, nodeId, toNodeID(target)), - ) - } - } - - return edges -} - export function getTransformedModelEdgesSourceTargets< TAdjacencyListKey extends string, TEdgeData extends LineageEdgeData = LineageEdgeData, diff --git a/web/common/src/components/Lineage/node/NodeAppendix.tsx b/web/common/src/components/Lineage/node/NodeAppendix.tsx index 76d64affed..5a703a468f 100644 --- a/web/common/src/components/Lineage/node/NodeAppendix.tsx +++ b/web/common/src/components/Lineage/node/NodeAppendix.tsx @@ -32,6 +32,7 @@ export const NodeAppendix = forwardRef( return (
diff --git a/web/common/src/components/Lineage/node/NodeBadge.tsx b/web/common/src/components/Lineage/node/NodeBadge.tsx index 943e5e9267..8c894ecca2 100644 --- a/web/common/src/components/Lineage/node/NodeBadge.tsx +++ b/web/common/src/components/Lineage/node/NodeBadge.tsx @@ -8,8 +8,9 @@ export const NodeBadge = React.forwardRef( return ( {hasDivider && } + return ( +
+ ) } diff --git a/web/common/src/components/Lineage/node/NodeHandle.tsx b/web/common/src/components/Lineage/node/NodeHandle.tsx index e737ff4327..4bfbfa6181 100644 --- a/web/common/src/components/Lineage/node/NodeHandle.tsx +++ b/web/common/src/components/Lineage/node/NodeHandle.tsx @@ -18,6 +18,7 @@ export const NodeHandle = React.memo(function NodeHandle({ }) { return ( ( ({ className, ...props }, ref) => { return (
( ref={ref} data-component="Metadata" className={cn( - 'justify-between gap-2 items-center whitespace-nowrap h-auto', + 'Metadata justify-between gap-2 items-center whitespace-nowrap h-auto', className, )} {...props} diff --git a/web/common/src/components/VirtualList/FilterableList.css b/web/common/src/components/VirtualList/FilterableList.css index 4dfdd87eea..3b1a8f8c3d 100644 --- a/web/common/src/components/VirtualList/FilterableList.css +++ b/web/common/src/components/VirtualList/FilterableList.css @@ -7,3 +7,10 @@ --color-filterable-list-input-placeholder: var(--color-input-placeholder); --color-filterable-list-input-border: var(--color-input-border); } + +.FilterableList__Input { + --color-input-background: var(--color-filterable-list-input-background); + --color-input-foreground: var(--color-filterable-list-input-foreground); + --color-input-placeholder: var(--color-filterable-list-input-placeholder); + --color-input-border: var(--color-filterable-list-input-border); +} diff --git a/web/common/src/components/VirtualList/FilterableList.tsx b/web/common/src/components/VirtualList/FilterableList.tsx index 5ea0d35039..d22bfca784 100644 --- a/web/common/src/components/VirtualList/FilterableList.tsx +++ b/web/common/src/components/VirtualList/FilterableList.tsx @@ -57,7 +57,7 @@ export function FilterableList({ setSearch(e.target.value) } inputSize="xs" - className="w-full" + className="FilterableList__Input w-full" /> Date: Fri, 3 Oct 2025 15:51:19 -0700 Subject: [PATCH 049/173] fix(web_common): fix layout update (#5483) --- web/common/src/components/Lineage/LineageLayout.tsx | 2 +- web/common/src/components/ModelName/ModelName.css | 2 ++ web/common/src/components/ModelName/ModelName.tsx | 2 +- web/common/tailwind.base.config.js | 4 ++++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx index 7340fa2656..2ad31c4b9e 100644 --- a/web/common/src/components/Lineage/LineageLayout.tsx +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -348,7 +348,7 @@ function LineageLayoutBase< }) } } - }, [currentNode?.id, setSelectedNodeId, nodes, setCenter]) + }, [currentNode?.id, setSelectedNodeId, setCenter]) return ( ( size="2xs" variant="transparent" text={name} - className="ml-2 w-6 hover:text-model-name-copy-icon-hover active:text-model-name-copy-icon-hover" + className="ml-2 w-6 hover:text-model-name-copy-icon-hover active:text-model-name-copy-icon-hover bg-model-name-copy-icon-background hover:bg-model-name-copy-icon-background-hover active:bg-model-name-copy-icon-background-hover" > {copied => copied ? ( diff --git a/web/common/tailwind.base.config.js b/web/common/tailwind.base.config.js index 49354591cc..8f385b53dc 100644 --- a/web/common/tailwind.base.config.js +++ b/web/common/tailwind.base.config.js @@ -71,6 +71,10 @@ export default { model: 'var(--color-model-name-model)', 'copy-icon': 'var(--color-model-name-copy-icon)', 'copy-icon-hover': 'var(--color-model-name-copy-icon-hover)', + 'copy-icon-background': + 'var(--color-model-name-copy-icon-background)', + 'copy-icon-background-hover': + 'var(--color-model-name-copy-icon-background-hover)', }, badge: { background: 'var(--color-badge-background)', From 223422fca0ab324630ac63c5e14a8a0434035250 Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Mon, 6 Oct 2025 10:30:24 -0700 Subject: [PATCH 050/173] fix(web_common): clean up lineage layout component (#5487) --- .../src/components/Lineage/LineageContext.ts | 4 - .../src/components/Lineage/LineageLayout.tsx | 404 +----------------- .../components/Lineage/LineageLayoutBase.tsx | 367 ++++++++++++++++ .../Lineage/LineageLayoutContainer.tsx | 43 ++ .../Lineage/stories/ModelLineage.tsx | 2 - .../components/VirtualList/FilterableList.tsx | 3 + 6 files changed, 432 insertions(+), 391 deletions(-) create mode 100644 web/common/src/components/Lineage/LineageLayoutBase.tsx create mode 100644 web/common/src/components/Lineage/LineageLayoutContainer.tsx diff --git a/web/common/src/components/Lineage/LineageContext.ts b/web/common/src/components/Lineage/LineageContext.ts index 6f4ee7e165..9da54dcbee 100644 --- a/web/common/src/components/Lineage/LineageContext.ts +++ b/web/common/src/components/Lineage/LineageContext.ts @@ -30,8 +30,6 @@ export interface LineageContextValue< setSelectedNodeId: React.Dispatch> // Layout - isBuildingLayout: boolean - setIsBuildingLayout: React.Dispatch> zoom: number setZoom: React.Dispatch> @@ -66,8 +64,6 @@ export function getInitial< nodes: [], nodesMap: {}, setNodesMap: () => {}, - isBuildingLayout: false, - setIsBuildingLayout: () => {}, currentNode: null, } } diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx index 2ad31c4b9e..e01e8ae9e9 100644 --- a/web/common/src/components/Lineage/LineageLayout.tsx +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -1,52 +1,25 @@ import { - Background, - BackgroundVariant, - Controls, - type EdgeChange, type EdgeTypes, - type NodeChange, type NodeTypes, - ReactFlow, ReactFlowProvider, type SetCenter, - getConnectedEdges, - getIncomers, - getOutgoers, - useReactFlow, - useViewport, - applyNodeChanges, - applyEdgeChanges, } from '@xyflow/react' -import '@xyflow/react/dist/style.css' -import './Lineage.css' - -import { debounce } from 'lodash' -import { CircuitBoard, Crosshair, LocateFixed, RotateCcw } from 'lucide-react' import React from 'react' -import { cn } from '@/utils' import { type LineageContextHook } from './LineageContext' -import { LineageControlButton } from './LineageControlButton' -import { LineageControlIcon } from './LineageControlIcon' + import { - DEFAULT_ZOOM, - type LineageEdge, type LineageEdgeData, type LineageNode, type LineageNodeData, - MAX_ZOOM, - MIN_ZOOM, - NODES_TRESHOLD, - NODES_TRESHOLD_ZOOM, type NodeId, type EdgeId, - ZOOM_THRESHOLD, type PortId, } from './utils' -import { VerticalContainer } from '../VerticalContainer/VerticalContainer' -import { MessageContainer } from '../MessageContainer/MessageContainer' -import { LoadingContainer } from '../LoadingContainer/LoadingContainer' + +import { LineageLayoutBase } from './LineageLayoutBase' +import { LineageLayoutContainer } from './LineageLayoutContainer' export function LineageLayout< TNodeData extends LineageNodeData = LineageNodeData, @@ -61,6 +34,7 @@ export function LineageLayout< controls, nodesDraggable, nodesConnectable, + isBuildingLayout, useLineage, onNodeClick, onNodeDoubleClick, @@ -72,6 +46,7 @@ export function LineageLayout< TEdgeID, TPortID > + isBuildingLayout?: boolean nodeTypes?: NodeTypes edgeTypes?: EdgeTypes className?: string @@ -91,360 +66,19 @@ export function LineageLayout< }) { return ( - + + + ) } - -function LineageLayoutBase< - TNodeData extends LineageNodeData = LineageNodeData, - TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, - TEdgeID extends string = EdgeId, - TPortID extends string = PortId, ->({ - nodeTypes, - edgeTypes, - className, - controls, - nodesDraggable = false, - nodesConnectable = false, - useLineage, - onNodeClick, - onNodeDoubleClick, -}: { - useLineage: LineageContextHook< - TNodeData, - TEdgeData, - TNodeID, - TEdgeID, - TPortID - > - nodesDraggable?: boolean - nodesConnectable?: boolean - nodeTypes?: NodeTypes - edgeTypes?: EdgeTypes - className?: string - controls?: - | React.ReactNode - | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) - onNodeClick?: ( - event: React.MouseEvent, - node: LineageNode, - ) => void - onNodeDoubleClick?: ( - event: React.MouseEvent, - node: LineageNode, - ) => void -}) { - const { zoom: viewportZoom } = useViewport() - const { setCenter } = useReactFlow() - - const { - isBuildingLayout, - currentNode, - zoom, - nodes: initialNodes, - edges: initialEdges, - nodesMap, - showOnlySelectedNodes, - selectedNodeId, - setZoom, - setSelectedNodeId, - setShowOnlySelectedNodes, - setSelectedNodes, - setSelectedEdges, - } = useLineage() - - const [nodes, setNodes] = React.useState(initialNodes) - const [edges, setEdges] = React.useState(initialEdges) - - const onNodesChange = React.useCallback( - (changes: NodeChange>[]) => { - setNodes( - applyNodeChanges>(changes, nodes), - ) - }, - [nodes, setNodes], - ) - - const onEdgesChange = React.useCallback( - ( - changes: EdgeChange>[], - ) => { - setEdges( - applyEdgeChanges>( - changes, - edges, - ), - ) - }, - [edges, setEdges], - ) - - const updateZoom = React.useMemo(() => debounce(setZoom, 200), [setZoom]) - - const zoomToCurrentNode = React.useCallback( - (zoom: number = DEFAULT_ZOOM) => { - if (currentNode) { - setCenter(currentNode.position.x, currentNode.position.y, { - zoom, - duration: 0, - }) - } - }, - [currentNode, setCenter], - ) - - const zoomToSelectedNode = React.useCallback( - (zoom: number = DEFAULT_ZOOM) => { - const node = selectedNodeId ? nodesMap[selectedNodeId] : null - if (node) { - setCenter(node.position.x, node.position.y, { - zoom, - duration: 0, - }) - } - }, - [nodesMap, selectedNodeId, setCenter], - ) - - const getAllIncomers = React.useCallback( - ( - node: LineageNode, - visited: Set = new Set(), - ): LineageNode[] => { - if (visited.has(node.id)) return [] - - visited.add(node.id) - - return Array.from( - new Set>([ - node, - ...getIncomers(node, nodes, edges) - .map(n => getAllIncomers(n, visited)) - .flat(), - ]), - ) - }, - [nodes, edges], - ) - - const getAllOutgoers = React.useCallback( - ( - node: LineageNode, - visited: Set = new Set(), - ): LineageNode[] => { - if (visited.has(node.id)) return [] - - visited.add(node.id) - - return Array.from( - new Set>([ - node, - ...getOutgoers(node, nodes, edges) - .map(n => getAllOutgoers(n, visited)) - .flat(), - ]), - ) - }, - [nodes, edges], - ) - - React.useEffect(() => { - setNodes(initialNodes) - }, [initialNodes]) - - React.useEffect(() => { - setEdges(initialEdges) - }, [initialEdges]) - - React.useEffect(() => { - if (selectedNodeId == null) { - setShowOnlySelectedNodes(false) - setSelectedNodes(new Set()) - setSelectedEdges(new Set()) - - return - } - - const node = selectedNodeId ? nodesMap[selectedNodeId] : null - - if (node == null) { - setSelectedNodeId(null) - return - } - - const incomers = getAllIncomers(node) - const outgoers = getAllOutgoers(node) - const connectedNodes = [...incomers, ...outgoers] - - if (currentNode) { - connectedNodes.push(currentNode) - } - - const connectedEdges = getConnectedEdges< - LineageNode, - LineageEdge - >(connectedNodes, edges) - const selectedNodes = new Set(connectedNodes.map(node => node.id)) - const selectedEdges = new Set( - connectedEdges.reduce((acc, edge) => { - if ([edge.source, edge.target].every(id => selectedNodes.has(id))) { - edge.zIndex = 2 - acc.add(edge.id) - } else { - edge.zIndex = 1 - } - return acc - }, new Set()), - ) - - setSelectedNodes(selectedNodes) - setSelectedEdges(selectedEdges) - }, [ - currentNode, - selectedNodeId, - setSelectedNodes, - setSelectedEdges, - getAllIncomers, - getAllOutgoers, - setShowOnlySelectedNodes, - setSelectedNodeId, - ]) - - React.useEffect(() => { - if (selectedNodeId) { - zoomToSelectedNode(zoom) - } else { - zoomToCurrentNode(zoom) - } - }, [zoomToCurrentNode, zoomToSelectedNode]) - - React.useEffect(() => { - updateZoom(viewportZoom) - }, [updateZoom, viewportZoom]) - - React.useEffect(() => { - if (currentNode?.id) { - setSelectedNodeId(currentNode.id) - } else { - const node = nodes.length > 0 ? nodes[nodes.length - 1] : null - - if (node) { - setCenter(node.position.x, node.position.y, { - zoom: zoom, - duration: 0, - }) - } - } - }, [currentNode?.id, setSelectedNodeId, setCenter]) - - return ( - - {isBuildingLayout && ( - - - Building layout... - - - )} - , - LineageEdge - > - className="shrink-0" - nodes={nodes} - edges={edges} - nodeTypes={nodeTypes} - edgeTypes={edgeTypes} - onNodesChange={onNodesChange} - onEdgesChange={onEdgesChange} - nodesDraggable={nodesDraggable} - nodesConnectable={nodesConnectable} - zoomOnDoubleClick={false} - panOnScroll={true} - zoomOnScroll={true} - minZoom={nodes.length > NODES_TRESHOLD ? NODES_TRESHOLD_ZOOM : MIN_ZOOM} - maxZoom={MAX_ZOOM} - fitView={false} - nodeOrigin={[0.5, 0.5]} - onlyRenderVisibleElements - onNodeClick={onNodeClick} - onNodeDoubleClick={onNodeDoubleClick} - > - {zoom > ZOOM_THRESHOLD && ( - - )} - - {currentNode && ( - zoomToCurrentNode(DEFAULT_ZOOM)} - disabled={isBuildingLayout} - > - - - )} - {selectedNodeId && ( - <> - setShowOnlySelectedNodes(!showOnlySelectedNodes)} - disabled={isBuildingLayout} - > - - - zoomToSelectedNode(DEFAULT_ZOOM)} - disabled={isBuildingLayout} - > - - - - )} - {controls && typeof controls === 'function' - ? controls({ setCenter }) - : controls} - - - - ) -} diff --git a/web/common/src/components/Lineage/LineageLayoutBase.tsx b/web/common/src/components/Lineage/LineageLayoutBase.tsx new file mode 100644 index 0000000000..af47a82b29 --- /dev/null +++ b/web/common/src/components/Lineage/LineageLayoutBase.tsx @@ -0,0 +1,367 @@ +import { + Background, + BackgroundVariant, + Controls, + type EdgeChange, + type EdgeTypes, + type NodeChange, + type NodeTypes, + ReactFlow, + type SetCenter, + getConnectedEdges, + getIncomers, + getOutgoers, + useReactFlow, + useViewport, + applyNodeChanges, + applyEdgeChanges, +} from '@xyflow/react' + +import '@xyflow/react/dist/style.css' +import './Lineage.css' + +import { debounce } from 'lodash' +import { CircuitBoard, Crosshair, LocateFixed, RotateCcw } from 'lucide-react' +import React from 'react' + +import { type LineageContextHook } from './LineageContext' +import { LineageControlButton } from './LineageControlButton' +import { LineageControlIcon } from './LineageControlIcon' +import { + DEFAULT_ZOOM, + type LineageEdge, + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + MAX_ZOOM, + MIN_ZOOM, + NODES_TRESHOLD, + NODES_TRESHOLD_ZOOM, + type NodeId, + type EdgeId, + ZOOM_THRESHOLD, + type PortId, +} from './utils' + +import '@xyflow/react/dist/style.css' +import './Lineage.css' +import { cn } from '@/utils' + +export function LineageLayoutBase< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>({ + nodeTypes, + edgeTypes, + className, + controls, + nodesDraggable = false, + nodesConnectable = false, + useLineage, + onNodeClick, + onNodeDoubleClick, +}: { + useLineage: LineageContextHook< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + > + nodesDraggable?: boolean + nodesConnectable?: boolean + nodeTypes?: NodeTypes + edgeTypes?: EdgeTypes + className?: string + controls?: + | React.ReactNode + | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) + onNodeClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void + onNodeDoubleClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void +}) { + const { zoom: viewportZoom } = useViewport() + const { setCenter } = useReactFlow() + + const { + currentNode, + zoom, + nodes: initialNodes, + edges: initialEdges, + nodesMap, + showOnlySelectedNodes, + selectedNodeId, + setZoom, + setSelectedNodeId, + setShowOnlySelectedNodes, + setSelectedNodes, + setSelectedEdges, + } = useLineage() + + const [nodes, setNodes] = React.useState(initialNodes) + const [edges, setEdges] = React.useState(initialEdges) + + const onNodesChange = React.useCallback( + (changes: NodeChange>[]) => { + setNodes( + applyNodeChanges>(changes, nodes), + ) + }, + [nodes, setNodes], + ) + + const onEdgesChange = React.useCallback( + ( + changes: EdgeChange>[], + ) => { + setEdges( + applyEdgeChanges>( + changes, + edges, + ), + ) + }, + [edges, setEdges], + ) + + const updateZoom = React.useMemo(() => debounce(setZoom, 200), [setZoom]) + + const zoomToCurrentNode = React.useCallback( + (zoom: number = DEFAULT_ZOOM) => { + if (currentNode) { + setCenter(currentNode.position.x, currentNode.position.y, { + zoom, + duration: 0, + }) + } + }, + [currentNode, setCenter], + ) + + const zoomToSelectedNode = React.useCallback( + (zoom: number = DEFAULT_ZOOM) => { + const node = selectedNodeId ? nodesMap[selectedNodeId] : null + if (node) { + setCenter(node.position.x, node.position.y, { + zoom, + duration: 0, + }) + } + }, + [nodesMap, selectedNodeId, setCenter], + ) + + const getAllIncomers = React.useCallback( + ( + node: LineageNode, + visited: Set = new Set(), + ): LineageNode[] => { + if (visited.has(node.id)) return [] + + visited.add(node.id) + + return Array.from( + new Set>([ + node, + ...getIncomers(node, nodes, edges) + .map(n => getAllIncomers(n, visited)) + .flat(), + ]), + ) + }, + [nodes, edges], + ) + + const getAllOutgoers = React.useCallback( + ( + node: LineageNode, + visited: Set = new Set(), + ): LineageNode[] => { + if (visited.has(node.id)) return [] + + visited.add(node.id) + + return Array.from( + new Set>([ + node, + ...getOutgoers(node, nodes, edges) + .map(n => getAllOutgoers(n, visited)) + .flat(), + ]), + ) + }, + [nodes, edges], + ) + + React.useEffect(() => { + setNodes(initialNodes) + }, [initialNodes]) + + React.useEffect(() => { + setEdges(initialEdges) + }, [initialEdges]) + + React.useEffect(() => { + if (selectedNodeId == null) { + setShowOnlySelectedNodes(false) + setSelectedNodes(new Set()) + setSelectedEdges(new Set()) + + return + } + + const node = selectedNodeId ? nodesMap[selectedNodeId] : null + + if (node == null) { + setSelectedNodeId(null) + return + } + + const incomers = getAllIncomers(node) + const outgoers = getAllOutgoers(node) + const connectedNodes = [...incomers, ...outgoers] + + if (currentNode) { + connectedNodes.push(currentNode) + } + + const connectedEdges = getConnectedEdges< + LineageNode, + LineageEdge + >(connectedNodes, edges) + const selectedNodes = new Set(connectedNodes.map(node => node.id)) + const selectedEdges = new Set( + connectedEdges.reduce((acc, edge) => { + if ([edge.source, edge.target].every(id => selectedNodes.has(id))) { + edge.zIndex = 2 + acc.add(edge.id) + } else { + edge.zIndex = 1 + } + return acc + }, new Set()), + ) + + setSelectedNodes(selectedNodes) + setSelectedEdges(selectedEdges) + }, [ + currentNode, + selectedNodeId, + setSelectedNodes, + setSelectedEdges, + getAllIncomers, + getAllOutgoers, + setShowOnlySelectedNodes, + setSelectedNodeId, + ]) + + React.useEffect(() => { + if (selectedNodeId) { + zoomToSelectedNode(zoom) + } else { + zoomToCurrentNode(zoom) + } + }, [zoomToCurrentNode, zoomToSelectedNode]) + + React.useEffect(() => { + updateZoom(viewportZoom) + }, [updateZoom, viewportZoom]) + + React.useEffect(() => { + if (currentNode?.id) { + setSelectedNodeId(currentNode.id) + } else { + const node = nodes.length > 0 ? nodes[nodes.length - 1] : null + + if (node) { + setCenter(node.position.x, node.position.y, { + zoom: zoom, + duration: 0, + }) + } + } + }, [currentNode?.id, setSelectedNodeId, setCenter]) + + return ( + , + LineageEdge + > + className={cn('shrink-0', className)} + nodes={nodes} + edges={edges} + nodeTypes={nodeTypes} + edgeTypes={edgeTypes} + onNodesChange={onNodesChange} + onEdgesChange={onEdgesChange} + nodesDraggable={nodesDraggable} + nodesConnectable={nodesConnectable} + zoomOnDoubleClick={false} + panOnScroll={true} + zoomOnScroll={true} + minZoom={nodes.length > NODES_TRESHOLD ? NODES_TRESHOLD_ZOOM : MIN_ZOOM} + maxZoom={MAX_ZOOM} + fitView={false} + nodeOrigin={[0.5, 0.5]} + onlyRenderVisibleElements + onNodeClick={onNodeClick} + onNodeDoubleClick={onNodeDoubleClick} + > + {zoom > ZOOM_THRESHOLD && ( + + )} + + {currentNode && ( + zoomToCurrentNode(DEFAULT_ZOOM)} + > + + + )} + {selectedNodeId && ( + <> + setShowOnlySelectedNodes(!showOnlySelectedNodes)} + > + + + zoomToSelectedNode(DEFAULT_ZOOM)} + > + + + + )} + {controls && typeof controls === 'function' + ? controls({ setCenter }) + : controls} + + + ) +} diff --git a/web/common/src/components/Lineage/LineageLayoutContainer.tsx b/web/common/src/components/Lineage/LineageLayoutContainer.tsx new file mode 100644 index 0000000000..e3385a3294 --- /dev/null +++ b/web/common/src/components/Lineage/LineageLayoutContainer.tsx @@ -0,0 +1,43 @@ +import { cn } from '@/utils' + +import React from 'react' + +import { VerticalContainer } from '../VerticalContainer/VerticalContainer' +import { MessageContainer } from '../MessageContainer/MessageContainer' +import { LoadingContainer } from '../LoadingContainer/LoadingContainer' + +export function LineageLayoutContainer({ + isBuildingLayout, + loadingMessage = 'Building layout...', + className, + children, +}: { + isBuildingLayout?: boolean + loadingMessage?: string + className?: string + children: React.ReactNode +}) { + return ( + + {isBuildingLayout && ( + + + {loadingMessage} + + + )} + {children} + + ) +} diff --git a/web/common/src/components/Lineage/stories/ModelLineage.tsx b/web/common/src/components/Lineage/stories/ModelLineage.tsx index 2902350919..800f292c4b 100644 --- a/web/common/src/components/Lineage/stories/ModelLineage.tsx +++ b/web/common/src/components/Lineage/stories/ModelLineage.tsx @@ -359,7 +359,6 @@ export const ModelLineage = ({ selectedNodes, selectedEdges, selectedNodeId, - isBuildingLayout, zoom, edges, nodes, @@ -372,7 +371,6 @@ export const ModelLineage = ({ setSelectedNodes, setSelectedEdges, setSelectedNodeId, - setIsBuildingLayout, setZoom, setEdges, setNodesMap, diff --git a/web/common/src/components/VirtualList/FilterableList.tsx b/web/common/src/components/VirtualList/FilterableList.tsx index d22bfca784..16a243eb0e 100644 --- a/web/common/src/components/VirtualList/FilterableList.tsx +++ b/web/common/src/components/VirtualList/FilterableList.tsx @@ -58,6 +58,9 @@ export function FilterableList({ } inputSize="xs" className="FilterableList__Input w-full" + onClick={(e: React.MouseEvent) => { + e.stopPropagation() + }} /> Date: Mon, 6 Oct 2025 11:33:19 -0700 Subject: [PATCH 051/173] fix(web_common): apply className and update stories (#5488) --- web/common/src/components/Lineage/LineageLayout.tsx | 6 ++++-- .../src/components/Lineage/LineageLayoutContainer.tsx | 2 +- web/common/src/components/Lineage/stories/ModelLineage.tsx | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx index e01e8ae9e9..2ab4a34879 100644 --- a/web/common/src/components/Lineage/LineageLayout.tsx +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -66,13 +66,15 @@ export function LineageLayout< }) { return ( - + + isBuildingLayout={isBuildingLayout} useLineage={useModelLineage} nodeTypes={nodeTypes} edgeTypes={edgeTypes} From a3030118220376c93f267df91d2ad7adce9a4536 Mon Sep 17 00:00:00 2001 From: Trey Spiller <1831878+treysp@users.noreply.github.com> Date: Mon, 6 Oct 2025 17:01:36 -0500 Subject: [PATCH 052/173] Fix: unique, user-friendly audit names for custom named dbt tests (#5484) --- sqlmesh/dbt/manifest.py | 64 ++++++++++++++++++- sqlmesh/dbt/test.py | 9 ++- tests/dbt/test_test.py | 132 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 4 deletions(-) diff --git a/sqlmesh/dbt/manifest.py b/sqlmesh/dbt/manifest.py index 17c5e91700..ea2058138f 100644 --- a/sqlmesh/dbt/manifest.py +++ b/sqlmesh/dbt/manifest.py @@ -61,6 +61,7 @@ extract_call_names, jinja_call_arg_name, ) +from sqlglot.helper import ensure_list if t.TYPE_CHECKING: from dbt.contracts.graph.manifest import Macro, Manifest @@ -353,15 +354,17 @@ def _load_tests(self) -> None: ) test_model = _test_model(node) + node_config = _node_base_config(node) + node_config["name"] = _build_test_name(node, dependencies) test = TestConfig( sql=sql, model_name=test_model, test_kwargs=node.test_metadata.kwargs if hasattr(node, "test_metadata") else {}, dependencies=dependencies, - **_node_base_config(node), + **node_config, ) - self._tests_per_package[node.package_name][node.name.lower()] = test + self._tests_per_package[node.package_name][node.unique_id] = test if test_model: self._tests_by_owner[test_model].append(test) @@ -741,7 +744,12 @@ def _test_model(node: ManifestNode) -> t.Optional[str]: attached_node = getattr(node, "attached_node", None) if attached_node: pieces = attached_node.split(".") - return pieces[-1] if pieces[0] in ["model", "seed"] else None + if pieces[0] in ["model", "seed"]: + # versioned models have format "model.package.model_name.v1" (4 parts) + if len(pieces) == 4: + return f"{pieces[2]}_{pieces[3]}" + return pieces[-1] + return None key_name = getattr(node, "file_key_name", None) if key_name: @@ -798,3 +806,53 @@ def _strip_jinja_materialization_tags(materialization_jinja: str) -> str: ) return materialization_jinja.strip() + + +def _build_test_name(node: ManifestNode, dependencies: Dependencies) -> str: + """ + Build a user-friendly test name that includes the test's model/source, column, + and args for tests with custom user names. Needed because dbt only generates these + names for tests that do not specify the "name" field in their YAML definition. + + Name structure + - Model test: [namespace]_[test name]_[model name]_[column name]__[arg values] + - Source test: [namespace]_source_[test name]_[source name]_[table name]_[column name]__[arg values] + """ + # standalone test + if not hasattr(node, "test_metadata"): + return node.name + + model_name = _test_model(node) + source_name = None + if not model_name and dependencies.sources: + # extract source and table names + source_parts = list(dependencies.sources)[0].split(".") + source_name = "_".join(source_parts) if len(source_parts) == 2 else source_parts[-1] + entity_name = model_name or source_name or "" + entity_name = f"_{entity_name}" if entity_name else "" + + name_prefix = "" + if namespace := getattr(node.test_metadata, "namespace", None): + name_prefix += f"{namespace}_" + if source_name and not model_name: + name_prefix += "source_" + + metadata_kwargs = node.test_metadata.kwargs + arg_val_parts = [] + for arg, val in sorted(metadata_kwargs.items()): + if arg == "model": + continue + if isinstance(val, dict): + val = list(val.values()) + val = [re.sub("[^0-9a-zA-Z_]+", "_", str(v)) for v in ensure_list(val)] + arg_val_parts.extend(val) + unique_args = "__".join(arg_val_parts) if arg_val_parts else "" + unique_args = f"_{unique_args}" if unique_args else "" + + auto_name = f"{name_prefix}{node.test_metadata.name}{entity_name}{unique_args}" + + if node.name == auto_name: + return node.name + + custom_prefix = name_prefix if source_name and not model_name else "" + return f"{custom_prefix}{node.name}{entity_name}{unique_args}" diff --git a/sqlmesh/dbt/test.py b/sqlmesh/dbt/test.py index 7d8a369068..c4a32b2189 100644 --- a/sqlmesh/dbt/test.py +++ b/sqlmesh/dbt/test.py @@ -122,7 +122,14 @@ def is_standalone(self) -> bool: return True # Check if test has references to other models - other_refs = {ref for ref in self.dependencies.refs if ref != self.model_name} + # For versioned models, refs include version (e.g., "model_name_v1") but model_name may not + self_refs = {self.model_name} + for ref in self.dependencies.refs: + # versioned models end in _vX + if ref.startswith(f"{self.model_name}_v"): + self_refs.add(ref) + + other_refs = {ref for ref in self.dependencies.refs if ref not in self_refs} return bool(other_refs) @property diff --git a/tests/dbt/test_test.py b/tests/dbt/test_test.py index 845c1d2fc0..fb33220c0c 100644 --- a/tests/dbt/test_test.py +++ b/tests/dbt/test_test.py @@ -1,3 +1,7 @@ +from pathlib import Path + +import pytest + from sqlmesh.dbt.test import TestConfig @@ -8,3 +12,131 @@ def test_multiline_test_kwarg() -> None: test_kwargs={"test_field": "foo\nbar\n"}, ) assert test._kwargs() == 'test_field="foo\nbar"' + + +@pytest.mark.xdist_group("dbt_manifest") +def test_tests_get_unique_names(tmp_path: Path, create_empty_project) -> None: + from sqlmesh.utils.yaml import YAML + from sqlmesh.core.context import Context + + yaml = YAML() + project_dir, model_dir = create_empty_project(project_name="local") + + model_file = model_dir / "my_model.sql" + with open(model_file, "w", encoding="utf-8") as f: + f.write("SELECT 1 as id, 'value1' as status") + + # Create schema.yml with: + # 1. Same test on model and source, both with/without custom test name + # 2. Same test on same model with different args, both with/without custom test name + # 3. Versioned model with tests (both built-in and custom named) + schema_yaml = { + "version": 2, + "sources": [ + { + "name": "raw", + "tables": [ + { + "name": "my_source", + "columns": [ + { + "name": "id", + "data_tests": [ + {"not_null": {"name": "custom_notnull_name"}}, + {"not_null": {}}, + ], + } + ], + } + ], + } + ], + "models": [ + { + "name": "my_model", + "columns": [ + { + "name": "id", + "data_tests": [ + {"not_null": {"name": "custom_notnull_name"}}, + {"not_null": {}}, + ], + }, + { + "name": "status", + "data_tests": [ + {"accepted_values": {"values": ["value1", "value2"]}}, + {"accepted_values": {"values": ["value1", "value2", "value3"]}}, + { + "accepted_values": { + "name": "custom_accepted_values_name", + "values": ["value1", "value2"], + } + }, + { + "accepted_values": { + "name": "custom_accepted_values_name", + "values": ["value1", "value2", "value3"], + } + }, + ], + }, + ], + }, + { + "name": "versioned_model", + "columns": [ + { + "name": "id", + "data_tests": [ + {"not_null": {}}, + {"not_null": {"name": "custom_versioned_notnull"}}, + ], + }, + { + "name": "amount", + "data_tests": [ + {"accepted_values": {"values": ["low", "high"]}}, + ], + }, + ], + "versions": [ + {"v": 1}, + {"v": 2}, + ], + }, + ], + } + + schema_file = model_dir / "schema.yml" + with open(schema_file, "w", encoding="utf-8") as f: + yaml.dump(schema_yaml, f) + + # Create versioned model files + versioned_model_v1_file = model_dir / "versioned_model_v1.sql" + with open(versioned_model_v1_file, "w", encoding="utf-8") as f: + f.write("SELECT 1 as id, 'low' as amount") + + versioned_model_v2_file = model_dir / "versioned_model_v2.sql" + with open(versioned_model_v2_file, "w", encoding="utf-8") as f: + f.write("SELECT 1 as id, 'low' as amount") + + context = Context(paths=project_dir) + + all_audit_names = list(context._audits.keys()) + list(context._standalone_audits.keys()) + assert sorted(all_audit_names) == [ + "local.accepted_values_my_model_status__value1__value2", + "local.accepted_values_my_model_status__value1__value2__value3", + "local.accepted_values_versioned_model_v1_amount__low__high", + "local.accepted_values_versioned_model_v2_amount__low__high", + "local.custom_accepted_values_name_my_model_status__value1__value2", + "local.custom_accepted_values_name_my_model_status__value1__value2__value3", + "local.custom_notnull_name_my_model_id", + "local.custom_versioned_notnull_versioned_model_v1_id", + "local.custom_versioned_notnull_versioned_model_v2_id", + "local.not_null_my_model_id", + "local.not_null_versioned_model_v1_id", + "local.not_null_versioned_model_v2_id", + "local.source_custom_notnull_name_raw_my_source_id", + "local.source_not_null_raw_my_source_id", + ] From 9fc6a2e378e67ea2f3b7a4859dda841c21a0ba59 Mon Sep 17 00:00:00 2001 From: Chris Rericha <67359577+crericha@users.noreply.github.com> Date: Mon, 6 Oct 2025 21:20:50 -0400 Subject: [PATCH 053/173] Fix: Add workaround to ignore source dependency if fqn matches model (#5492) --- sqlmesh/dbt/basemodel.py | 19 ++++++++++++++----- sqlmesh/dbt/context.py | 8 ++++++++ sqlmesh/dbt/source.py | 5 +++++ tests/dbt/test_transformation.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 3e325f13e6..7c7e9e2e76 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -130,7 +130,7 @@ class BaseModelConfig(GeneralConfig): unique_id: str = "" name: str = "" package_name: str = "" - fqn: t.List[str] = [] + fqn_: t.List[str] = Field(default_factory=list, alias="fqn") schema_: str = Field("", alias="schema") database: t.Optional[str] = None alias: t.Optional[str] = None @@ -281,15 +281,17 @@ def remove_tests_with_invalid_refs(self, context: DbtContext) -> None: and all(source in context.sources for source in test.dependencies.sources) ] + @property + def fqn(self) -> str: + return ".".join(self.fqn_) + @property def sqlmesh_config_fields(self) -> t.Set[str]: return {"description", "owner", "stamp", "storage_format"} @property def node_info(self) -> DbtNodeInfo: - return DbtNodeInfo( - unique_id=self.unique_id, name=self.name, fqn=".".join(self.fqn), alias=self.alias - ) + return DbtNodeInfo(unique_id=self.unique_id, name=self.name, fqn=self.fqn, alias=self.alias) def sqlmesh_model_kwargs( self, @@ -327,7 +329,14 @@ def sqlmesh_model_kwargs( "column_descriptions": column_descriptions_to_sqlmesh(self.columns) or None, "depends_on": { model.canonical_name(context) for model in model_context.refs.values() - }.union({source.canonical_name(context) for source in model_context.sources.values()}), + }.union( + { + source.canonical_name(context) + for source in model_context.sources.values() + if source.fqn not in context.model_fqns + # Allow dbt projects to reference a model as a source without causing a cycle + }, + ), "jinja_macros": jinja_macros, "path": self.path, "pre_statements": [d.jinja_statement(hook.sql) for hook in self.pre_hook], diff --git a/sqlmesh/dbt/context.py b/sqlmesh/dbt/context.py index 67e70d3c79..bcdae8f97a 100644 --- a/sqlmesh/dbt/context.py +++ b/sqlmesh/dbt/context.py @@ -51,6 +51,7 @@ class DbtContext: _project_name: t.Optional[str] = None _variables: t.Dict[str, t.Any] = field(default_factory=dict) _models: t.Dict[str, ModelConfig] = field(default_factory=dict) + _model_fqns: t.Set[str] = field(default_factory=set) _seeds: t.Dict[str, SeedConfig] = field(default_factory=dict) _sources: t.Dict[str, SourceConfig] = field(default_factory=dict) _refs: t.Dict[str, t.Union[ModelConfig, SeedConfig]] = field(default_factory=dict) @@ -144,6 +145,7 @@ def models(self) -> t.Dict[str, ModelConfig]: def models(self, models: t.Dict[str, ModelConfig]) -> None: self._models = {} self._refs = {} + self._model_fqns = set() self.add_models(models) def add_models(self, models: t.Dict[str, ModelConfig]) -> None: @@ -151,6 +153,12 @@ def add_models(self, models: t.Dict[str, ModelConfig]) -> None: self._models.update(models) self._jinja_environment = None + @property + def model_fqns(self) -> t.Set[str]: + if not self._model_fqns: + self._model_fqns = {model.fqn for model in self._models.values()} + return self._model_fqns + @property def seeds(self) -> t.Dict[str, SeedConfig]: return self._seeds diff --git a/sqlmesh/dbt/source.py b/sqlmesh/dbt/source.py index 76ee682e77..efafbf1642 100644 --- a/sqlmesh/dbt/source.py +++ b/sqlmesh/dbt/source.py @@ -36,6 +36,7 @@ class SourceConfig(GeneralConfig): # DBT configuration fields name: str = "" source_name_: str = Field("", alias="source_name") + fqn_: t.List[str] = Field(default_factory=list, alias="fqn") database: t.Optional[str] = None schema_: t.Optional[str] = Field(None, alias="schema") identifier: t.Optional[str] = None @@ -64,6 +65,10 @@ def table_name(self) -> t.Optional[str]: def config_name(self) -> str: return f"{self.source_name_}.{self.name}" + @property + def fqn(self) -> str: + return ".".join(self.fqn_) + def canonical_name(self, context: DbtContext) -> str: if self._canonical_name is None: source = context.get_callable_macro("source") diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index 141c160e7e..0a1091a7fc 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -53,6 +53,7 @@ ) from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.model import Materialization, ModelConfig +from sqlmesh.dbt.source import SourceConfig from sqlmesh.dbt.project import Project from sqlmesh.dbt.relation import Policy from sqlmesh.dbt.seed import SeedConfig @@ -2678,3 +2679,31 @@ def test_selected_resources_context_variable( result = context.render(test_condition, selected_resources=selected_resources) assert result.strip() == "has_resources" + + +def test_ignore_source_depends_on_when_also_model(dbt_dummy_postgres_config: PostgresConfig): + context = DbtContext() + context._target = dbt_dummy_postgres_config + + source_a = SourceConfig( + name="source_a", + fqn=["package", "schema", "model_a"], + ) + source_a._canonical_name = "schema.source_a" + source_b = SourceConfig( + name="source_b", + fqn=["package", "schema", "source_b"], + ) + source_b._canonical_name = "schema.source_b" + context.sources = {"source_a": source_a, "source_b": source_b} + + model = ModelConfig( + dependencies=Dependencies(sources={"source_a", "source_b"}), + fqn=["package", "schema", "test_model"], + ) + context.models = { + "test_model": model, + "model_a": ModelConfig(name="model_a", fqn=["package", "schema", "model_a"]), + } + + assert model.sqlmesh_model_kwargs(context)["depends_on"] == {"schema.source_b"} From 8e9fe231c5624cc8ea56910e9fa535fa9fef0fac Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Mon, 6 Oct 2025 20:02:29 -0700 Subject: [PATCH 054/173] fix(web_common): lineage styling and border colors (#5494) --- .../LineageColumnLevel/FactoryColumn.tsx | 35 +++++++++++++++---- .../Lineage/LineageControlButton.tsx | 2 +- .../components/Lineage/LineageLayoutBase.tsx | 16 +-------- .../src/components/Lineage/node/NodePort.tsx | 2 +- .../Lineage/stories/ModelLineage.tsx | 2 +- web/common/tailwind.lineage.config.js | 9 ++++- 6 files changed, 40 insertions(+), 26 deletions(-) diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx index 19b73c3ef6..90def0f5ea 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx @@ -202,10 +202,22 @@ export function FactoryColumn< className="FactoryColumn__Information" info={description} > - + ) : ( - + )} @@ -214,7 +226,7 @@ export function FactoryColumn< {type} } className={cn( - 'FactoryColumn__Metadata relative overflow-visible group p-0', + 'FactoryColumn__Metadata relative overflow-visible group', isDisabledColumn && 'cursor-not-allowed', className, )} @@ -240,8 +252,8 @@ export function FactoryColumn< id={id} nodeId={nodeId} className={cn( - 'border-t border-lineage-divider first:border-t-0 px-2', - isTriggeredColumn && 'bg-lineage-model-column-active', + 'border-t border-lineage-divider first:border-t-0', + isTriggeredColumn && 'bg-lineage-model-column-active-background', )} > {renderColumn()} @@ -252,11 +264,20 @@ export function FactoryColumn< }) } -function DisplayColumName({ name }: { name: string }) { +function DisplayColumName({ + name, + className, +}: { + name: string + className?: string +}) { return ( {name} diff --git a/web/common/src/components/Lineage/LineageControlButton.tsx b/web/common/src/components/Lineage/LineageControlButton.tsx index 6f66f90db7..d3f3d5d215 100644 --- a/web/common/src/components/Lineage/LineageControlButton.tsx +++ b/web/common/src/components/Lineage/LineageControlButton.tsx @@ -21,7 +21,7 @@ export function LineageControlButton({ side="left" sideOffset={8} delayDuration={0} - className="px-2 py-1 text-xs rounded-sm font-semibold bg-lineage-control-button-tooltip-background text-lineage-control-button-tooltip-foreground" + className="px-2 py-1 text-xs rounded-sm font-semibold bg-lineage-control-button-tooltip-background text-lineage-control-button-tooltip-foreground border-2 border-lineage-control-button-tooltip-border" trigger={
{ - if (currentNode?.id) { - setSelectedNodeId(currentNode.id) - } else { - const node = nodes.length > 0 ? nodes[nodes.length - 1] : null - - if (node) { - setCenter(node.position.x, node.position.y, { - zoom: zoom, - duration: 0, - }) - } - } - }, [currentNode?.id, setSelectedNodeId, setCenter]) - return ( , @@ -327,6 +312,7 @@ export function LineageLayoutBase< showInteractive={false} showFitView={false} position="top-right" + className="m-1 border-2 border-lineage-control-border rounded-sm overflow-hidden" > {currentNode && ( () => cleanupLayoutWorker(), []) diff --git a/web/common/tailwind.lineage.config.js b/web/common/tailwind.lineage.config.js index c2c8800a6f..b615ea756f 100644 --- a/web/common/tailwind.lineage.config.js +++ b/web/common/tailwind.lineage.config.js @@ -8,6 +8,7 @@ export default { divider: 'var(--color-lineage-divider)', border: 'var(--color-lineage-border)', control: { + border: 'var(--color-lineage-control-border)', background: { DEFAULT: 'var(--color-lineage-control-background)', hover: 'var(--color-lineage-control-background-hover)', @@ -18,6 +19,7 @@ export default { }, button: { tooltip: { + border: 'var(--color-lineage-control-button-tooltip-border)', background: 'var(--color-lineage-control-button-tooltip-background)', foreground: @@ -68,6 +70,12 @@ export default { }, model: { column: { + active: { + background: + 'var(--color-lineage-model-column-active-background)', + foreground: + 'var(--color-lineage-model-column-active-foreground)', + }, source: { background: 'var(--color-lineage-model-column-source-background)', @@ -81,7 +89,6 @@ export default { 'var(--color-lineage-model-column-error-background)', icon: 'var(--color-lineage-model-column-error-icon)', }, - active: 'var(--color-lineage-model-column-active)', icon: { DEFAULT: 'var(--color-lineage-model-column-icon)', active: 'var(--color-lineage-model-column-icon-active)', From 5ffed107fca598816073d4f916da67db78cf7aae Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Tue, 7 Oct 2025 10:18:12 +0300 Subject: [PATCH 055/173] Feat(dbt): Add support for transaction in dbt pre and post hooks (#5480) --- sqlmesh/core/model/common.py | 1 + sqlmesh/core/model/definition.py | 29 ++- sqlmesh/core/snapshot/evaluator.py | 67 +++++-- sqlmesh/dbt/basemodel.py | 13 +- tests/core/test_snapshot_evaluator.py | 20 +- tests/dbt/test_transformation.py | 177 ++++++++++++++++++ .../dbt/sushi_test/macros/insert_hook.sql | 14 ++ .../models/model_with_transaction_hooks.sql | 56 ++++++ 8 files changed, 347 insertions(+), 30 deletions(-) create mode 100644 tests/fixtures/dbt/sushi_test/macros/insert_hook.sql create mode 100644 tests/fixtures/dbt/sushi_test/models/model_with_transaction_hooks.sql diff --git a/sqlmesh/core/model/common.py b/sqlmesh/core/model/common.py index d2b9a11c08..9e117b56fb 100644 --- a/sqlmesh/core/model/common.py +++ b/sqlmesh/core/model/common.py @@ -663,6 +663,7 @@ def parse_strings_with_macro_refs(value: t.Any, dialect: DialectType) -> t.Any: class ParsableSql(PydanticModel): sql: str + transaction: t.Optional[bool] = None _parsed: t.Optional[exp.Expression] = None _parsed_dialect: t.Optional[str] = None diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py index f81dae004b..0a20ab23b2 100644 --- a/sqlmesh/core/model/definition.py +++ b/sqlmesh/core/model/definition.py @@ -363,6 +363,7 @@ def render_pre_statements( expand: t.Iterable[str] = tuple(), deployability_index: t.Optional[DeployabilityIndex] = None, engine_adapter: t.Optional[EngineAdapter] = None, + inside_transaction: t.Optional[bool] = True, **kwargs: t.Any, ) -> t.List[exp.Expression]: """Renders pre-statements for a model. @@ -384,7 +385,11 @@ def render_pre_statements( The list of rendered expressions. """ return self._render_statements( - self.pre_statements, + [ + stmt + for stmt in self.pre_statements + if stmt.args.get("transaction", True) == inside_transaction + ], start=start, end=end, execution_time=execution_time, @@ -405,6 +410,7 @@ def render_post_statements( expand: t.Iterable[str] = tuple(), deployability_index: t.Optional[DeployabilityIndex] = None, engine_adapter: t.Optional[EngineAdapter] = None, + inside_transaction: t.Optional[bool] = True, **kwargs: t.Any, ) -> t.List[exp.Expression]: """Renders post-statements for a model. @@ -420,13 +426,18 @@ def render_post_statements( that depend on materialized tables. Model definitions are inlined and can thus be run end to end on the fly. deployability_index: Determines snapshots that are deployable in the context of this render. + inside_transaction: Whether to render hooks with transaction=True (inside) or transaction=False (outside). kwargs: Additional kwargs to pass to the renderer. Returns: The list of rendered expressions. """ return self._render_statements( - self.post_statements, + [ + stmt + for stmt in self.post_statements + if stmt.args.get("transaction", True) == inside_transaction + ], start=start, end=end, execution_time=execution_time, @@ -567,6 +578,8 @@ def _get_parsed_statements(self, attr_name: str) -> t.List[exp.Expression]: result = [] for v in value: parsed = v.parse(self.dialect) + if getattr(v, "transaction", None) is not None: + parsed.set("transaction", v.transaction) if not isinstance(parsed, exp.Semicolon): result.append(parsed) return result @@ -2592,9 +2605,17 @@ def _create_model( if statement_field in kwargs: # Macros extracted from these statements need to be treated as metadata only is_metadata = statement_field == "on_virtual_update" - statements.extend((stmt, is_metadata) for stmt in kwargs[statement_field]) + for stmt in kwargs[statement_field]: + # Extract the expression if it's ParsableSql already + expr = stmt.parse(dialect) if isinstance(stmt, ParsableSql) else stmt + statements.append((expr, is_metadata)) kwargs[statement_field] = [ - ParsableSql.from_parsed_expression(stmt, dialect, use_meta_sql=use_original_sql) + # this to retain the transaction information + stmt + if isinstance(stmt, ParsableSql) + else ParsableSql.from_parsed_expression( + stmt, dialect, use_meta_sql=use_original_sql + ) for stmt in kwargs[statement_field] ] diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 2676709d85..773010d673 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -750,13 +750,19 @@ def _evaluate_snapshot( **render_statements_kwargs ) + evaluation_strategy = _evaluation_strategy(snapshot, adapter) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": False}, + ) + with ( adapter.transaction(), adapter.session(snapshot.model.render_session_properties(**render_statements_kwargs)), ): - evaluation_strategy = _evaluation_strategy(snapshot, adapter) evaluation_strategy.run_pre_statements( - snapshot=snapshot, render_kwargs=render_statements_kwargs + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": True}, ) if not target_table_exists or (model.is_seed and not snapshot.intervals): @@ -828,10 +834,16 @@ def _evaluate_snapshot( ) evaluation_strategy.run_post_statements( - snapshot=snapshot, render_kwargs=render_statements_kwargs + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": True}, ) - return wap_id + evaluation_strategy.run_post_statements( + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": False}, + ) + + return wap_id def create_snapshot( self, @@ -865,6 +877,11 @@ def create_snapshot( deployability_index=deployability_index, ) + evaluation_strategy = _evaluation_strategy(snapshot, adapter) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} + ) + with ( adapter.transaction(), adapter.session(snapshot.model.render_session_properties(**create_render_kwargs)), @@ -896,6 +913,10 @@ def create_snapshot( dry_run=True, ) + evaluation_strategy.run_post_statements( + snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} + ) + if on_complete is not None: on_complete(snapshot) @@ -1097,6 +1118,11 @@ def _migrate_snapshot( ) target_table_name = snapshot.table_name() + evaluation_strategy = _evaluation_strategy(snapshot, adapter) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} + ) + with ( adapter.transaction(), adapter.session(snapshot.model.render_session_properties(**render_kwargs)), @@ -1134,6 +1160,10 @@ def _migrate_snapshot( dry_run=True, ) + evaluation_strategy.run_post_statements( + snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} + ) + # Retry in case when the table is migrated concurrently from another plan application @retry( reraise=True, @@ -1454,7 +1484,8 @@ def _execute_create( } if run_pre_post_statements: evaluation_strategy.run_pre_statements( - snapshot=snapshot, render_kwargs=create_render_kwargs + snapshot=snapshot, + render_kwargs={**create_render_kwargs, "inside_transaction": True}, ) evaluation_strategy.create( table_name=table_name, @@ -1471,7 +1502,8 @@ def _execute_create( ) if run_pre_post_statements: evaluation_strategy.run_post_statements( - snapshot=snapshot, render_kwargs=create_render_kwargs + snapshot=snapshot, + render_kwargs={**create_render_kwargs, "inside_transaction": True}, ) def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex) -> bool: @@ -2944,12 +2976,20 @@ def append( ) def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: - # in dbt custom materialisations it's up to the user when to run the pre hooks - pass + # in dbt custom materialisations it's up to the user to run the pre hooks inside the transaction + if not render_kwargs.get("inside_transaction", True): + super().run_pre_statements( + snapshot=snapshot, + render_kwargs=render_kwargs, + ) def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: - # in dbt custom materialisations it's up to the user when to run the post hooks - pass + # in dbt custom materialisations it's up to the user to run the post hooks inside the transaction + if not render_kwargs.get("inside_transaction", True): + super().run_post_statements( + snapshot=snapshot, + render_kwargs=render_kwargs, + ) def _execute_materialization( self, @@ -2985,14 +3025,15 @@ def _execute_materialization( "sql": str(query_or_df), "is_first_insert": is_first_insert, "create_only": create_only, - # FIXME: Add support for transaction=False "pre_hooks": [ - AttributeDict({"sql": s.this.this, "transaction": True}) + AttributeDict({"sql": s.this.this, "transaction": transaction}) for s in model.pre_statements + if (transaction := s.args.get("transaction", True)) ], "post_hooks": [ - AttributeDict({"sql": s.this.this, "transaction": True}) + AttributeDict({"sql": s.this.this, "transaction": transaction}) for s in model.post_statements + if (transaction := s.args.get("transaction", True)) ], "model_instance": model, **kwargs, diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 7c7e9e2e76..0c719ebb88 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -13,6 +13,7 @@ from sqlmesh.core.config.base import UpdateStrategy from sqlmesh.core.config.common import VirtualEnvironmentMode from sqlmesh.core.model import Model +from sqlmesh.core.model.common import ParsableSql from sqlmesh.core.node import DbtNodeInfo from sqlmesh.dbt.column import ( ColumnConfig, @@ -87,7 +88,7 @@ class Hook(DbtConfig): """ sql: SqlStr - transaction: bool = True # TODO not yet supported + transaction: bool = True _sql_validator = sql_str_validator @@ -339,8 +340,14 @@ def sqlmesh_model_kwargs( ), "jinja_macros": jinja_macros, "path": self.path, - "pre_statements": [d.jinja_statement(hook.sql) for hook in self.pre_hook], - "post_statements": [d.jinja_statement(hook.sql) for hook in self.post_hook], + "pre_statements": [ + ParsableSql(sql=d.jinja_statement(hook.sql).sql(), transaction=hook.transaction) + for hook in self.pre_hook + ], + "post_statements": [ + ParsableSql(sql=d.jinja_statement(hook.sql).sql(), transaction=hook.transaction) + for hook in self.post_hook + ], "tags": self.tags, "physical_schema_mapping": context.sqlmesh_config.physical_schema_mapping, "default_catalog": context.target.database, diff --git a/tests/core/test_snapshot_evaluator.py b/tests/core/test_snapshot_evaluator.py index 68061544a8..c0a7a01b51 100644 --- a/tests/core/test_snapshot_evaluator.py +++ b/tests/core/test_snapshot_evaluator.py @@ -3232,11 +3232,11 @@ def test_create_post_statements_use_non_deployable_table( evaluator.create([snapshot], {}, DeployabilityIndex.none_deployable()) call_args = adapter_mock.execute.call_args_list - pre_calls = call_args[0][0][0] + pre_calls = call_args[1][0][0] assert len(pre_calls) == 1 assert pre_calls[0].sql(dialect="postgres") == expected_call - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert post_calls[0].sql(dialect="postgres") == expected_call @@ -3294,11 +3294,11 @@ def model_with_statements(context, **kwargs): expected_call = f'CREATE INDEX IF NOT EXISTS "idx" ON "sqlmesh__db"."db__test_model__{snapshot.version}__dev" /* db.test_model */("id")' call_args = adapter_mock.execute.call_args_list - pre_calls = call_args[0][0][0] + pre_calls = call_args[1][0][0] assert len(pre_calls) == 1 assert pre_calls[0].sql(dialect="postgres") == expected_call - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert post_calls[0].sql(dialect="postgres") == expected_call @@ -3356,14 +3356,14 @@ def create_log_table(evaluator, view_name): ) call_args = adapter_mock.execute.call_args_list - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert ( post_calls[0].sql(dialect="postgres") == f'CREATE INDEX IF NOT EXISTS "test_idx" ON "sqlmesh__test_schema"."test_schema__test_model__{snapshot.version}__dev" /* test_schema.test_model */("a")' ) - on_virtual_update_calls = call_args[2][0][0] + on_virtual_update_calls = call_args[4][0][0] assert ( on_virtual_update_calls[0].sql(dialect="postgres") == 'GRANT SELECT ON VIEW "test_schema__test_env"."test_model" /* test_schema.test_model */ TO ROLE "admin"' @@ -3441,7 +3441,7 @@ def model_with_statements(context, **kwargs): ) call_args = adapter_mock.execute.call_args_list - on_virtual_update_call = call_args[2][0][0][0] + on_virtual_update_call = call_args[4][0][0][0] assert ( on_virtual_update_call.sql(dialect="postgres") == 'CREATE INDEX IF NOT EXISTS "idx" ON "db"."test_model_3" /* db.test_model_3 */("id")' @@ -4187,11 +4187,11 @@ def test_multiple_engine_creation(snapshot: Snapshot, adapters, make_snapshot): assert view_args[1][0][0] == "test_schema__test_env.test_model" call_args = engine_adapters["secondary"].execute.call_args_list - pre_calls = call_args[0][0][0] + pre_calls = call_args[1][0][0] assert len(pre_calls) == 1 assert pre_calls[0].sql(dialect="postgres") == expected_call - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert post_calls[0].sql(dialect="postgres") == expected_call @@ -4459,7 +4459,7 @@ def model_with_statements(context, **kwargs): # For the pre/post statements verify the model-specific gateway was used engine_adapters["default"].execute.assert_called_once() - assert len(engine_adapters["secondary"].execute.call_args_list) == 2 + assert len(engine_adapters["secondary"].execute.call_args_list) == 4 # Validate that the get_catalog_type method was called only on the secondary engine from the macro evaluator engine_adapters["default"].get_catalog_type.assert_not_called() diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index 0a1091a7fc..dd69f46200 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -2707,3 +2707,180 @@ def test_ignore_source_depends_on_when_also_model(dbt_dummy_postgres_config: Pos } assert model.sqlmesh_model_kwargs(context)["depends_on"] == {"schema.source_b"} + + +@pytest.mark.xdist_group("dbt_manifest") +def test_dbt_hooks_with_transaction_flag(sushi_test_dbt_context: Context): + model_fqn = '"memory"."sushi"."model_with_transaction_hooks"' + assert model_fqn in sushi_test_dbt_context.models + + model = sushi_test_dbt_context.models[model_fqn] + + pre_statements = model.pre_statements_ + assert pre_statements is not None + assert len(pre_statements) >= 3 + + # we need to check the expected SQL but more importantly that the transaction flags are there + assert any( + s.sql == 'JINJA_STATEMENT_BEGIN;\n{{ log("pre-hook") }}\nJINJA_END;' + and s.transaction is True + for s in pre_statements + ) + assert any( + "CREATE TABLE IF NOT EXISTS hook_outside_pre_table" in s.sql and s.transaction is False + for s in pre_statements + ) + assert any( + "CREATE TABLE IF NOT EXISTS shared_hook_table" in s.sql and s.transaction is False + for s in pre_statements + ) + assert any( + "{{ insert_into_shared_hook_table('inside_pre') }}" in s.sql and s.transaction is True + for s in pre_statements + ) + + post_statements = model.post_statements_ + assert post_statements is not None + assert len(post_statements) >= 4 + assert any( + s.sql == 'JINJA_STATEMENT_BEGIN;\n{{ log("post-hook") }}\nJINJA_END;' + and s.transaction is True + for s in post_statements + ) + assert any( + "{{ insert_into_shared_hook_table('inside_post') }}" in s.sql and s.transaction is True + for s in post_statements + ) + assert any( + "CREATE TABLE IF NOT EXISTS hook_outside_post_table" in s.sql and s.transaction is False + for s in post_statements + ) + assert any( + "{{ insert_into_shared_hook_table('after_commit') }}" in s.sql and s.transaction is False + for s in post_statements + ) + + # render_pre_statements with inside_transaction=True should only return inserrt + inside_pre_statements = model.render_pre_statements(inside_transaction=True) + assert len(inside_pre_statements) == 1 + assert ( + inside_pre_statements[0].sql() + == """INSERT INTO "shared_hook_table" ("id", "hook_name", "execution_order", "created_at") VALUES ((SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), 'inside_pre', (SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), NOW())""" + ) + + # while for render_pre_statements with inside_transaction=False the create statements + outside_pre_statements = model.render_pre_statements(inside_transaction=False) + assert len(outside_pre_statements) == 2 + assert "CREATE" in outside_pre_statements[0].sql() + assert "hook_outside_pre_table" in outside_pre_statements[0].sql() + assert "CREATE" in outside_pre_statements[1].sql() + assert "shared_hook_table" in outside_pre_statements[1].sql() + + # similarly for post statements + inside_post_statements = model.render_post_statements(inside_transaction=True) + assert len(inside_post_statements) == 1 + assert ( + inside_post_statements[0].sql() + == """INSERT INTO "shared_hook_table" ("id", "hook_name", "execution_order", "created_at") VALUES ((SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), 'inside_post', (SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), NOW())""" + ) + + outside_post_statements = model.render_post_statements(inside_transaction=False) + assert len(outside_post_statements) == 2 + assert "CREATE" in outside_post_statements[0].sql() + assert "hook_outside_post_table" in outside_post_statements[0].sql() + assert "INSERT" in outside_post_statements[1].sql() + assert "shared_hook_table" in outside_post_statements[1].sql() + + +@pytest.mark.xdist_group("dbt_manifest") +def test_dbt_hooks_with_transaction_flag_execution(sushi_test_dbt_context: Context): + model_fqn = '"memory"."sushi"."model_with_transaction_hooks"' + assert model_fqn in sushi_test_dbt_context.models + + plan = sushi_test_dbt_context.plan(select_models=["sushi.model_with_transaction_hooks"]) + sushi_test_dbt_context.apply(plan) + + result = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM sushi.model_with_transaction_hooks" + ) + assert len(result) == 1 + assert result["id"][0] == 1 + assert result["name"][0] == "test" + + # ensure the outside pre-hook and post-hook table were created + pre_outside = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM hook_outside_pre_table" + ) + assert len(pre_outside) == 1 + assert pre_outside["id"][0] == 1 + assert pre_outside["location"][0] == "outside" + assert pre_outside["execution_order"][0] == 1 + + post_outside = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM hook_outside_post_table" + ) + assert len(post_outside) == 1 + assert post_outside["id"][0] == 5 + assert post_outside["location"][0] == "outside" + assert post_outside["execution_order"][0] == 5 + + # verify the shared table that was created by before_begin and populated by all hooks + shared_table = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM shared_hook_table ORDER BY execution_order" + ) + assert len(shared_table) == 3 + assert shared_table["execution_order"].is_monotonic_increasing + + # The order of creation and insertion will verify the following order of execution + # 1. before_begin (transaction=false) ran BEFORE the transaction started and created the table + # 2. inside_pre (transaction=true) ran INSIDE the transaction and could insert into the table + # 3. inside_post (transaction=true) ran INSIDE the transaction and could insert into the table (but after pre statement) + # 4. after_commit (transaction=false) ran AFTER the transaction committed + + assert shared_table["id"][0] == 1 + assert shared_table["hook_name"][0] == "inside_pre" + assert shared_table["execution_order"][0] == 1 + + assert shared_table["id"][1] == 2 + assert shared_table["hook_name"][1] == "inside_post" + assert shared_table["execution_order"][1] == 2 + + assert shared_table["id"][2] == 3 + assert shared_table["hook_name"][2] == "after_commit" + assert shared_table["execution_order"][2] == 3 + + # the timestamps also should be monotonically increasing for the same reason + for i in range(len(shared_table) - 1): + assert shared_table["created_at"][i] <= shared_table["created_at"][i + 1] + + # the tables using the alternate syntax should have correct order as well + assert pre_outside["created_at"][0] < shared_table["created_at"][0] + assert post_outside["created_at"][0] > shared_table["created_at"][1] + + # running with execution time one day in the future to simulate a run + tomorrow = datetime.now() + timedelta(days=1) + sushi_test_dbt_context.run( + select_models=["sushi.model_with_transaction_hooks"], execution_time=tomorrow + ) + + # to verify that the transaction information persists in state and is respected + shared_table = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM shared_hook_table ORDER BY execution_order" + ) + + # and the execution order for run is similar + assert shared_table["execution_order"].is_monotonic_increasing + assert shared_table["id"][3] == 4 + assert shared_table["hook_name"][3] == "inside_pre" + assert shared_table["execution_order"][3] == 4 + + assert shared_table["id"][4] == 5 + assert shared_table["hook_name"][4] == "inside_post" + assert shared_table["execution_order"][4] == 5 + + assert shared_table["id"][5] == 6 + assert shared_table["hook_name"][5] == "after_commit" + assert shared_table["execution_order"][5] == 6 + + for i in range(len(shared_table) - 1): + assert shared_table["created_at"][i] <= shared_table["created_at"][i + 1] diff --git a/tests/fixtures/dbt/sushi_test/macros/insert_hook.sql b/tests/fixtures/dbt/sushi_test/macros/insert_hook.sql new file mode 100644 index 0000000000..aa27a7fe6d --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/macros/insert_hook.sql @@ -0,0 +1,14 @@ +{% macro insert_into_shared_hook_table(hook_name) %} +INSERT INTO shared_hook_table ( + id, + hook_name, + execution_order, + created_at +) +VALUES ( + (SELECT COALESCE(MAX(id), 0) + 1 FROM shared_hook_table), + '{{ hook_name }}', + (SELECT COALESCE(MAX(id), 0) + 1 FROM shared_hook_table), + NOW() +) +{% endmacro %} diff --git a/tests/fixtures/dbt/sushi_test/models/model_with_transaction_hooks.sql b/tests/fixtures/dbt/sushi_test/models/model_with_transaction_hooks.sql new file mode 100644 index 0000000000..49883f73df --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/models/model_with_transaction_hooks.sql @@ -0,0 +1,56 @@ +{{ + config( + materialized = 'table', + + pre_hook = [ + { + "sql": " + CREATE TABLE IF NOT EXISTS hook_outside_pre_table AS + SELECT + 1 AS id, + 'outside' AS location, + 1 AS execution_order, + NOW() AS created_at + ", + "transaction": false + }, + + before_begin(" + CREATE TABLE IF NOT EXISTS shared_hook_table ( + id INT, + hook_name VARCHAR, + execution_order INT, + created_at TIMESTAMPTZ + ) + "), + + { + "sql": "{{ insert_into_shared_hook_table('inside_pre') }}", + "transaction": true + } + ], + + post_hook = [ + { + "sql": "{{ insert_into_shared_hook_table('inside_post') }}", + "transaction": true + }, + + { + "sql": " + CREATE TABLE IF NOT EXISTS hook_outside_post_table AS + SELECT + 5 AS id, + 'outside' AS location, + 5 AS execution_order, + NOW() AS created_at + ", + "transaction": false + }, + + after_commit("{{ insert_into_shared_hook_table('after_commit') }}") + ] + ) +}} + +SELECT 1 AS id, 'test' AS name; From db1faeb7fbc122e4f796adab7937b08ddd2e7416 Mon Sep 17 00:00:00 2001 From: etonlels Date: Tue, 7 Oct 2025 11:14:57 -0600 Subject: [PATCH 056/173] feat: add `batch_concurrency` to `ModelDefaultsConfig` (#5481) Co-authored-by: Claude --- sqlmesh/core/config/model.py | 2 + sqlmesh/core/model/kind.py | 12 +++++ tests/core/test_model.py | 101 +++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/sqlmesh/core/config/model.py b/sqlmesh/core/config/model.py index 5406a5497b..aeefdf2557 100644 --- a/sqlmesh/core/config/model.py +++ b/sqlmesh/core/config/model.py @@ -45,6 +45,7 @@ class ModelDefaultsConfig(BaseConfig): allow_partials: Whether the models can process partial (incomplete) data intervals. enabled: Whether the models are enabled. interval_unit: The temporal granularity of the models data intervals. By default computed from cron. + batch_concurrency: The maximum number of batches that can run concurrently for an incremental model. pre_statements: The list of SQL statements that get executed before a model runs. post_statements: The list of SQL statements that get executed before a model runs. on_virtual_update: The list of SQL statements to be executed after the virtual update. @@ -69,6 +70,7 @@ class ModelDefaultsConfig(BaseConfig): interval_unit: t.Optional[t.Union[str, IntervalUnit]] = None enabled: t.Optional[t.Union[str, bool]] = None formatting: t.Optional[t.Union[str, bool]] = None + batch_concurrency: t.Optional[int] = None pre_statements: t.Optional[t.List[t.Union[str, exp.Expression]]] = None post_statements: t.Optional[t.List[t.Union[str, exp.Expression]]] = None on_virtual_update: t.Optional[t.List[t.Union[str, exp.Expression]]] = None diff --git a/sqlmesh/core/model/kind.py b/sqlmesh/core/model/kind.py index cc4c6f0826..ad5197a73a 100644 --- a/sqlmesh/core/model/kind.py +++ b/sqlmesh/core/model/kind.py @@ -1105,6 +1105,18 @@ def create_model_kind(v: t.Any, dialect: str, defaults: t.Dict[str, t.Any]) -> M ): props[on_change_property] = defaults.get(on_change_property) + # only pass the batch_concurrency user default to models inheriting from _IncrementalBy + # that don't explicitly set it in the model definition, but ignore subclasses of _IncrementalBy + # that hardcode a specific batch_concurrency + if issubclass(kind_type, _IncrementalBy): + BATCH_CONCURRENCY: t.Final = "batch_concurrency" + if ( + props.get(BATCH_CONCURRENCY) is None + and defaults.get(BATCH_CONCURRENCY) is not None + and kind_type.all_field_infos()[BATCH_CONCURRENCY].default is None + ): + props[BATCH_CONCURRENCY] = defaults.get(BATCH_CONCURRENCY) + if kind_type == CustomKind: # load the custom materialization class and check if it uses a custom kind type from sqlmesh.core.snapshot.evaluator import get_custom_materialization_type diff --git a/tests/core/test_model.py b/tests/core/test_model.py index f1a9eeb0b9..f6fc448b79 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -7583,6 +7583,107 @@ def test_forward_only_on_destructive_change_config() -> None: assert context_model.on_destructive_change.is_allow +def test_batch_concurrency_config() -> None: + # No batch_concurrency default for incremental models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column c + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency is None + + # batch_concurrency specified in model defaults applies to incremental models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column c + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency == 5 + + # batch_concurrency specified in model definition overrides default + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column c, + batch_concurrency 10 + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency == 10 + + # batch_concurrency default does not apply to non-incremental models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind FULL, + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency is None + + # batch_concurrency default does not apply to INCREMENTAL_BY_UNIQUE_KEY models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key a + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency == 1 + + def test_model_meta_on_additive_change_property() -> None: """Test that ModelMeta has on_additive_change property that works like on_destructive_change.""" from sqlmesh.core.model.kind import IncrementalByTimeRangeKind, OnAdditiveChange From 0d15b4bac1c4c8e4c43bb426685885c2b756455a Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Tue, 7 Oct 2025 11:27:34 -0700 Subject: [PATCH 057/173] Fix: Account for missing stdin / stdout when checking whether the runtime environment is interactive (#5500) --- sqlmesh/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sqlmesh/__init__.py b/sqlmesh/__init__.py index 47e9bacce2..7712a41379 100644 --- a/sqlmesh/__init__.py +++ b/sqlmesh/__init__.py @@ -126,6 +126,8 @@ def is_cicd_environment() -> bool: def is_interactive_environment() -> bool: + if sys.stdin is None or sys.stdout is None: + return False return sys.stdin.isatty() and sys.stdout.isatty() From d2dd2dfa9d73325657f4f9e2d22d7846705c4be0 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 8 Oct 2025 08:38:13 +1300 Subject: [PATCH 058/173] Chore: Fix windows tests (#5496) --- sqlmesh/utils/cache.py | 4 ++++ tests/conftest.py | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sqlmesh/utils/cache.py b/sqlmesh/utils/cache.py index 4b557e43b6..e72c34f632 100644 --- a/sqlmesh/utils/cache.py +++ b/sqlmesh/utils/cache.py @@ -59,6 +59,10 @@ def __init__(self, path: Path, prefix: t.Optional[str] = None): threshold = to_datetime("1 week ago").timestamp() # delete all old cache files for file in self._path.glob("*"): + if IS_WINDOWS: + # the file.stat() call below will fail on windows if the :file name is longer than 260 chars + file = fix_windows_path(file) + if not file.stem.startswith(self._cache_version) or file.stat().st_atime < threshold: file.unlink(missing_ok=True) diff --git a/tests/conftest.py b/tests/conftest.py index 7a61281ad0..955b50234c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -580,7 +580,19 @@ def _make_function( # shutil.copytree just doesnt work properly with the symlinks on Windows, regardless of the `symlinks` setting src = str(path.absolute()) dst = str(temp_dir.absolute()) - os.system(f"robocopy {src} {dst} /E /COPYALL") + + # Robocopy flag reference: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/robocopy#copy-options + # /E: Copy subdirectories, including empty directories + # /COPY:D Copy "data" only. In particular, this avoids copying auditing information, which can throw + # an error like "ERROR : You do not have the Manage Auditing user right" + robocopy_cmd = f"robocopy {src} {dst} /E /COPY:D" + exit_code = os.system(robocopy_cmd) + + # exit code reference: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/robocopy#exit-return-codes + if exit_code > 8: + raise Exception( + f"robocopy command: '{robocopy_cmd}' failed with exit code: {exit_code}" + ) # after copying, delete the files that would have been ignored for root, dirs, _ in os.walk(temp_dir): From 850fcd132b8c2ca28476611eb156b33c85b34da3 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 8 Oct 2025 08:44:29 +1300 Subject: [PATCH 059/173] Feat(sqlmesh_dbt): Support the --threads CLI option (#5493) --- sqlmesh/core/config/loader.py | 1 + sqlmesh/dbt/loader.py | 5 +++++ sqlmesh_dbt/cli.py | 14 +++++++++++--- sqlmesh_dbt/operations.py | 5 ++++- tests/dbt/cli/test_operations.py | 30 ++++++++++++++++++++++++++++++ tests/dbt/cli/test_run.py | 8 ++++++++ 6 files changed, 59 insertions(+), 4 deletions(-) diff --git a/sqlmesh/core/config/loader.py b/sqlmesh/core/config/loader.py index 75915800e6..2d202cb276 100644 --- a/sqlmesh/core/config/loader.py +++ b/sqlmesh/core/config/loader.py @@ -177,6 +177,7 @@ def load_config_from_paths( dbt_profile_name=kwargs.pop("profile", None), dbt_target_name=kwargs.pop("target", None), variables=variables, + threads=kwargs.pop("threads", None), ) if type(dbt_python_config) != config_type: dbt_python_config = convert_config_type(dbt_python_config, config_type) diff --git a/sqlmesh/dbt/loader.py b/sqlmesh/dbt/loader.py index eb117a3e40..39973776a8 100644 --- a/sqlmesh/dbt/loader.py +++ b/sqlmesh/dbt/loader.py @@ -49,6 +49,7 @@ def sqlmesh_config( dbt_profile_name: t.Optional[str] = None, dbt_target_name: t.Optional[str] = None, variables: t.Optional[t.Dict[str, t.Any]] = None, + threads: t.Optional[int] = None, register_comments: t.Optional[bool] = None, **kwargs: t.Any, ) -> Config: @@ -67,6 +68,10 @@ def sqlmesh_config( if not issubclass(loader, DbtLoader): raise ConfigError("The loader must be a DbtLoader.") + if threads is not None: + # the to_sqlmesh() function on TargetConfig maps self.threads -> concurrent_tasks + profile.target.threads = threads + return Config( loader=loader, model_defaults=model_defaults, diff --git a/sqlmesh_dbt/cli.py b/sqlmesh_dbt/cli.py index 83230de3fd..ec11e7730e 100644 --- a/sqlmesh_dbt/cli.py +++ b/sqlmesh_dbt/cli.py @@ -8,11 +8,13 @@ import functools -def _get_dbt_operations(ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]]) -> DbtOperations: +def _get_dbt_operations( + ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]], threads: t.Optional[int] = None +) -> DbtOperations: if not isinstance(ctx.obj, functools.partial): raise ValueError(f"Unexpected click context object: {type(ctx.obj)}") - dbt_operations = ctx.obj(vars=vars) + dbt_operations = ctx.obj(vars=vars, threads=threads) if not isinstance(dbt_operations, DbtOperations): raise ValueError(f"Unexpected dbt operations type: {type(dbt_operations)}") @@ -128,16 +130,22 @@ def dbt( @click.option( "--empty/--no-empty", default=False, help="If specified, limit input refs and sources" ) +@click.option( + "--threads", + type=int, + help="Specify number of threads to use while executing models. Overrides settings in profiles.yml.", +) @vars_option @click.pass_context def run( ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]], + threads: t.Optional[int], env: t.Optional[str] = None, **kwargs: t.Any, ) -> None: """Compile SQL and execute against the current target database.""" - _get_dbt_operations(ctx, vars).run(environment=env, **kwargs) + _get_dbt_operations(ctx, vars, threads).run(environment=env, **kwargs) @dbt.command(name="list") diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py index 6e8b452b28..cb1ac217cc 100644 --- a/sqlmesh_dbt/operations.py +++ b/sqlmesh_dbt/operations.py @@ -235,6 +235,7 @@ def create( profile: t.Optional[str] = None, target: t.Optional[str] = None, vars: t.Optional[t.Dict[str, t.Any]] = None, + threads: t.Optional[int] = None, debug: bool = False, ) -> DbtOperations: with Progress(transient=True) as progress: @@ -265,7 +266,9 @@ def create( sqlmesh_context = Context( paths=[project_dir], - config_loader_kwargs=dict(profile=profile, target=target, variables=vars), + config_loader_kwargs=dict( + profile=profile, target=target, variables=vars, threads=threads + ), load=True, # DbtSelector selects based on dbt model fqn's rather than SQLMesh model names selector=DbtSelector, diff --git a/tests/dbt/cli/test_operations.py b/tests/dbt/cli/test_operations.py index b23c87882a..139336297c 100644 --- a/tests/dbt/cli/test_operations.py +++ b/tests/dbt/cli/test_operations.py @@ -333,3 +333,33 @@ def test_run_option_full_refresh_with_selector(jaffle_shop_duckdb: Path): assert not plan.empty_backfill assert not plan.skip_backfill assert plan.models_to_backfill == set(['"jaffle_shop"."main"."stg_customers"']) + + +def test_create_sets_concurrent_tasks_based_on_threads(create_empty_project: EmptyProjectCreator): + project_dir, _ = create_empty_project(project_name="test") + + # add a postgres target because duckdb overrides to concurrent_tasks=1 regardless of what gets specified + profiles_yml_file = project_dir / "profiles.yml" + profiles_yml = yaml.load(profiles_yml_file) + profiles_yml["test"]["outputs"]["postgres"] = { + "type": "postgres", + "host": "localhost", + "port": 5432, + "user": "postgres", + "password": "postgres", + "dbname": "test", + "schema": "test", + } + profiles_yml_file.write_text(yaml.dump(profiles_yml)) + + operations = create(project_dir=project_dir, target="postgres") + + assert operations.context.concurrent_tasks == 1 # 1 is the default + + operations = create(project_dir=project_dir, threads=16, target="postgres") + + assert operations.context.concurrent_tasks == 16 + assert all( + g.connection and g.connection.concurrent_tasks == 16 + for g in operations.context.config.gateways.values() + ) diff --git a/tests/dbt/cli/test_run.py b/tests/dbt/cli/test_run.py index 755553bb57..4fdb7a0cdb 100644 --- a/tests/dbt/cli/test_run.py +++ b/tests/dbt/cli/test_run.py @@ -83,3 +83,11 @@ def test_run_with_changes_and_full_refresh( ("foo", "bar", "changed"), ("baz", "bing", "changed"), ] + + +def test_run_with_threads(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): + result = invoke_cli(["run", "--threads", "4"]) + assert result.exit_code == 0 + assert not result.exception + + assert "Model batches executed" in result.output From 9f573b4ada368c3cd3b098170d9209e5a92d5ac8 Mon Sep 17 00:00:00 2001 From: David Dai Date: Tue, 7 Oct 2025 15:51:29 -0700 Subject: [PATCH 060/173] feat(experimental): add grants support for DBT custom materializations (#5489) --- sqlmesh/core/snapshot/evaluator.py | 14 ++++++ tests/dbt/test_custom_materializations.py | 56 +++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 773010d673..f7aea5cff1 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -2940,6 +2940,13 @@ def create( **kwargs, ) + # Apply grants after dbt custom materialization table creation + if not skip_grants: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def insert( self, table_name: str, @@ -2958,6 +2965,13 @@ def insert( **kwargs, ) + # Apply grants after custom materialization insert (only on first insert) + if is_first_insert: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def append( self, table_name: str, diff --git a/tests/dbt/test_custom_materializations.py b/tests/dbt/test_custom_materializations.py index 9e7a94315c..c1625d0251 100644 --- a/tests/dbt/test_custom_materializations.py +++ b/tests/dbt/test_custom_materializations.py @@ -7,6 +7,7 @@ from sqlmesh import Context from sqlmesh.core.config import ModelDefaultsConfig +from sqlmesh.core.engine_adapter import DuckDBEngineAdapter from sqlmesh.core.model.kind import DbtCustomKind from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.manifest import ManifestHelper @@ -719,3 +720,58 @@ def test_custom_materialization_lineage_tracking(copy_to_temp_path: t.Callable): # Dev and prod should have the same data as they share physical data assert dev_analytics_result["count"][0] == prod_analytics_result["count"][0] assert dev_analytics_result["unique_waiters"][0] == prod_analytics_result["unique_waiters"][0] + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_grants(copy_to_temp_path: t.Callable, mocker): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + grants_model_content = """ +{{ config( + materialized='custom_incremental', + grants={ + 'select': ['user1', 'user2'], + 'insert': ['writer'] + } +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + 1 as id, + 'grants_test' as test_type +""".strip() + + (models_dir / "test_grants_model.sql").write_text(grants_model_content) + + mocker.patch.object(DuckDBEngineAdapter, "SUPPORTS_GRANTS", True) + mocker.patch.object(DuckDBEngineAdapter, "_get_current_grants_config", return_value={}) + + sync_grants_calls = [] + + def mock_sync_grants(*args, **kwargs): + sync_grants_calls.append((args, kwargs)) + + mocker.patch.object(DuckDBEngineAdapter, "sync_grants_config", side_effect=mock_sync_grants) + + context = Context(paths=path) + + model = context.get_model("sushi.test_grants_model") + assert isinstance(model.kind, DbtCustomKind) + plan = context.plan(select_models=["sushi.test_grants_model"]) + context.apply(plan) + + assert len(sync_grants_calls) == 1 + args = sync_grants_calls[0][0] + assert args + + table = args[0] + grants_config = args[1] + assert table.sql(dialect="duckdb") == "memory.sushi.test_grants_model" + assert grants_config == { + "select": ["user1", "user2"], + "insert": ["writer"], + } From 7d7469037e03b917b047287501b4f01185262cf9 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 8 Oct 2025 13:29:32 +1300 Subject: [PATCH 061/173] Feat: Add config flag to infer the state schema per dbt target (#5485) --- sqlmesh/cli/project_init.py | 10 +++- sqlmesh/core/config/__init__.py | 2 +- sqlmesh/core/config/dbt.py | 13 +++++ sqlmesh/core/config/loader.py | 5 ++ sqlmesh/core/config/root.py | 3 ++ sqlmesh/dbt/loader.py | 26 ++++++++++ tests/dbt/test_config.py | 38 +++++++++++++- tests/dbt/test_integration.py | 50 ++++++++++++++++++- tests/fixtures/dbt/empty_project/profiles.yml | 6 ++- 9 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 sqlmesh/core/config/dbt.py diff --git a/sqlmesh/cli/project_init.py b/sqlmesh/cli/project_init.py index 6b4f6c7a83..e3132a6de3 100644 --- a/sqlmesh/cli/project_init.py +++ b/sqlmesh/cli/project_init.py @@ -116,7 +116,15 @@ def _gen_config( - invalidselectstarexpansion - noambiguousprojections """, - ProjectTemplate.DBT: f"""# --- Virtual Data Environment Mode --- + ProjectTemplate.DBT: f"""# --- DBT-specific options --- +dbt: + # This configuration ensures that each dbt target gets its own isolated state. + # The inferred state schemas are named "sqlmesh_state__", eg "sqlmesh_state_jaffle_shop_dev" + # If this is undesirable, you may manually configure the gateway to use a specific state schema name + # https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#selecting-a-different-state-connection + infer_state_schema_name: True + +# --- Virtual Data Environment Mode --- # Enable Virtual Data Environments (VDE) for *development* environments. # Note that the production environment in dbt projects is not virtual by default to maintain compatibility with existing tooling. # https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#virtual-data-environment-modes diff --git a/sqlmesh/core/config/__init__.py b/sqlmesh/core/config/__init__.py index 0dc99c0fd1..42ed82c6e6 100644 --- a/sqlmesh/core/config/__init__.py +++ b/sqlmesh/core/config/__init__.py @@ -36,6 +36,6 @@ from sqlmesh.core.config.naming import NameInferenceConfig as NameInferenceConfig from sqlmesh.core.config.linter import LinterConfig as LinterConfig from sqlmesh.core.config.plan import PlanConfig as PlanConfig -from sqlmesh.core.config.root import Config as Config +from sqlmesh.core.config.root import Config as Config, DbtConfig as DbtConfig from sqlmesh.core.config.run import RunConfig as RunConfig from sqlmesh.core.config.scheduler import BuiltInSchedulerConfig as BuiltInSchedulerConfig diff --git a/sqlmesh/core/config/dbt.py b/sqlmesh/core/config/dbt.py new file mode 100644 index 0000000000..e3132c40a4 --- /dev/null +++ b/sqlmesh/core/config/dbt.py @@ -0,0 +1,13 @@ +from sqlmesh.core.config.base import BaseConfig + + +class DbtConfig(BaseConfig): + """ + Represents dbt-specific options on the SQLMesh root config. + + These options are only taken into account for dbt projects and are ignored on native projects + """ + + infer_state_schema_name: bool = False + """If set, indicates to the dbt loader that the state schema should be inferred based on the profile/target + so that each target gets its own isolated state""" diff --git a/sqlmesh/core/config/loader.py b/sqlmesh/core/config/loader.py index 2d202cb276..e05c148b90 100644 --- a/sqlmesh/core/config/loader.py +++ b/sqlmesh/core/config/loader.py @@ -172,12 +172,17 @@ def load_config_from_paths( if dbt_project_file: from sqlmesh.dbt.loader import sqlmesh_config + infer_state_schema_name = False + if dbt := non_python_config.dbt: + infer_state_schema_name = dbt.infer_state_schema_name + dbt_python_config = sqlmesh_config( project_root=dbt_project_file.parent, dbt_profile_name=kwargs.pop("profile", None), dbt_target_name=kwargs.pop("target", None), variables=variables, threads=kwargs.pop("threads", None), + infer_state_schema_name=infer_state_schema_name, ) if type(dbt_python_config) != config_type: dbt_python_config = convert_config_type(dbt_python_config, config_type) diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index 9b6fae63e3..211d271b01 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -36,6 +36,7 @@ from sqlmesh.core.config.linter import LinterConfig as LinterConfig from sqlmesh.core.config.plan import PlanConfig from sqlmesh.core.config.run import RunConfig +from sqlmesh.core.config.dbt import DbtConfig from sqlmesh.core.config.scheduler import ( BuiltInSchedulerConfig, SchedulerConfig, @@ -173,6 +174,7 @@ class Config(BaseConfig): linter: LinterConfig = LinterConfig() janitor: JanitorConfig = JanitorConfig() cache_dir: t.Optional[str] = None + dbt: t.Optional[DbtConfig] = None _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { "gateways": UpdateStrategy.NESTED_UPDATE, @@ -191,6 +193,7 @@ class Config(BaseConfig): "before_all": UpdateStrategy.EXTEND, "after_all": UpdateStrategy.EXTEND, "linter": UpdateStrategy.NESTED_UPDATE, + "dbt": UpdateStrategy.NESTED_UPDATE, } _connection_config_validator = connection_config_validator diff --git a/sqlmesh/dbt/loader.py b/sqlmesh/dbt/loader.py index 39973776a8..049c761ed1 100644 --- a/sqlmesh/dbt/loader.py +++ b/sqlmesh/dbt/loader.py @@ -11,6 +11,7 @@ ConnectionConfig, GatewayConfig, ModelDefaultsConfig, + DbtConfig as RootDbtConfig, ) from sqlmesh.core.environment import EnvironmentStatements from sqlmesh.core.loader import CacheBase, LoadedProject, Loader @@ -51,6 +52,7 @@ def sqlmesh_config( variables: t.Optional[t.Dict[str, t.Any]] = None, threads: t.Optional[int] = None, register_comments: t.Optional[bool] = None, + infer_state_schema_name: bool = False, **kwargs: t.Any, ) -> Config: project_root = project_root or Path() @@ -72,16 +74,40 @@ def sqlmesh_config( # the to_sqlmesh() function on TargetConfig maps self.threads -> concurrent_tasks profile.target.threads = threads + gateway_kwargs = {} + if infer_state_schema_name: + profile_name = context.profile_name + + # Note: we deliberately isolate state based on the target *schema* and not the target name. + # It is assumed that the project will define a target, eg 'dev', and then in each users own ~/.dbt/profiles.yml the schema + # for the 'dev' target is overriden to something user-specific, rather than making the target name itself user-specific. + # This means that the schema name is the indicator of isolated state, not the target name which may be re-used across multiple schemas. + target_schema = profile.target.schema_ + + # dbt-core doesnt allow schema to be undefined, but it does allow an empty string, and then just + # fails at runtime when `CREATE SCHEMA ""` doesnt work + if not target_schema: + raise ConfigError( + f"Target '{profile.target_name}' does not specify a schema.\n" + "A schema is required in order to infer where to store SQLMesh state" + ) + + inferred_state_schema_name = f"sqlmesh_state_{profile_name}_{target_schema}" + logger.info("Inferring state schema: %s", inferred_state_schema_name) + gateway_kwargs["state_schema"] = inferred_state_schema_name + return Config( loader=loader, model_defaults=model_defaults, variables=variables or {}, + dbt=RootDbtConfig(infer_state_schema_name=infer_state_schema_name), **{ "default_gateway": profile.target_name if "gateways" not in kwargs else "", "gateways": { profile.target_name: GatewayConfig( connection=profile.target.to_sqlmesh(**target_to_sqlmesh_args), state_connection=state_connection, + **gateway_kwargs, ) }, # type: ignore **kwargs, diff --git a/tests/dbt/test_config.py b/tests/dbt/test_config.py index b3ee0c422a..5dccd90ed2 100644 --- a/tests/dbt/test_config.py +++ b/tests/dbt/test_config.py @@ -15,6 +15,7 @@ from sqlmesh.core.dialect import jinja_query from sqlmesh.core.model import SqlModel from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange +from sqlmesh.core.state_sync import CachingStateSync, EngineAdapterStateSync from sqlmesh.dbt.builtin import Api from sqlmesh.dbt.column import ColumnConfig from sqlmesh.dbt.common import Dependencies @@ -46,7 +47,8 @@ ) from sqlmesh.dbt.test import TestConfig from sqlmesh.utils.errors import ConfigError -from sqlmesh.utils.yaml import load as yaml_load +from sqlmesh.utils.yaml import load as yaml_load, dump as yaml_dump +from tests.dbt.conftest import EmptyProjectCreator pytestmark = pytest.mark.dbt @@ -1211,3 +1213,37 @@ def test_empty_vars_config(tmp_path): # Verify the variables are empty (not causing any issues) assert project.packages["test_empty_vars"].variables == {} assert project.context.variables == {} + + +def test_infer_state_schema_name(create_empty_project: EmptyProjectCreator): + project_dir, _ = create_empty_project("test_foo", "dev") + + # infer_state_schema_name defaults to False if omitted + config = sqlmesh_config(project_root=project_dir) + assert config.dbt + assert not config.dbt.infer_state_schema_name + assert config.get_state_schema() == "sqlmesh" + + # create_empty_project() uses the default dbt template for sqlmesh yaml config which + # sets infer_state_schema_name=True + ctx = Context(paths=[project_dir]) + assert ctx.config.dbt + assert ctx.config.dbt.infer_state_schema_name + assert ctx.config.get_state_schema() == "sqlmesh_state_test_foo_main" + assert isinstance(ctx.state_sync, CachingStateSync) + assert isinstance(ctx.state_sync.state_sync, EngineAdapterStateSync) + assert ctx.state_sync.state_sync.schema == "sqlmesh_state_test_foo_main" + + # If the user delberately overrides state_schema then we should respect this choice + config_file = project_dir / "sqlmesh.yaml" + config_yaml = yaml_load(config_file) + config_yaml["gateways"] = {"dev": {"state_schema": "state_override"}} + config_file.write_text(yaml_dump(config_yaml)) + + ctx = Context(paths=[project_dir]) + assert ctx.config.dbt + assert ctx.config.dbt.infer_state_schema_name + assert ctx.config.get_state_schema() == "state_override" + assert isinstance(ctx.state_sync, CachingStateSync) + assert isinstance(ctx.state_sync.state_sync, EngineAdapterStateSync) + assert ctx.state_sync.state_sync.schema == "state_override" diff --git a/tests/dbt/test_integration.py b/tests/dbt/test_integration.py index e1f051dbcf..ab22bf7826 100644 --- a/tests/dbt/test_integration.py +++ b/tests/dbt/test_integration.py @@ -19,7 +19,8 @@ from sqlmesh.core.config.connection import DuckDBConnectionConfig from sqlmesh.core.engine_adapter import DuckDBEngineAdapter from sqlmesh.utils.pandas import columns_to_types_from_df -from sqlmesh.utils.yaml import YAML +from sqlmesh.utils.yaml import YAML, load as yaml_load, dump as yaml_dump +from sqlmesh_dbt.operations import init_project_if_required from tests.utils.pandas import compare_dataframes, create_df # Some developers had issues with this test freezing locally so we mark it as cicdonly @@ -604,3 +605,50 @@ def test_dbt_node_info(jaffle_shop_duckdb_context: Context): relationship_audit.node.dbt_node_info.name == "relationships_orders_customer_id__customer_id__ref_customers_" ) + + +def test_state_schema_isolation_per_target(jaffle_shop_duckdb: Path): + profiles_file = jaffle_shop_duckdb / "profiles.yml" + + profiles_yml = yaml_load(profiles_file) + + # make prod / dev config identical with the exception of a different default schema to simulate using the same warehouse + profiles_yml["jaffle_shop"]["outputs"]["prod"] = { + **profiles_yml["jaffle_shop"]["outputs"]["dev"] + } + profiles_yml["jaffle_shop"]["outputs"]["prod"]["schema"] = "prod_schema" + profiles_yml["jaffle_shop"]["outputs"]["dev"]["schema"] = "dev_schema" + + profiles_file.write_text(yaml_dump(profiles_yml)) + + init_project_if_required(jaffle_shop_duckdb) + + # start off with the prod target + prod_ctx = Context(paths=[jaffle_shop_duckdb], config_loader_kwargs={"target": "prod"}) + assert prod_ctx.config.get_state_schema() == "sqlmesh_state_jaffle_shop_prod_schema" + assert all("prod_schema" in fqn for fqn in prod_ctx.models) + assert prod_ctx.plan(auto_apply=True).has_changes + assert not prod_ctx.plan(auto_apply=True).has_changes + + # dev target should have changes - new state separate from prod + dev_ctx = Context(paths=[jaffle_shop_duckdb], config_loader_kwargs={"target": "dev"}) + assert dev_ctx.config.get_state_schema() == "sqlmesh_state_jaffle_shop_dev_schema" + assert all("dev_schema" in fqn for fqn in dev_ctx.models) + assert dev_ctx.plan(auto_apply=True).has_changes + assert not dev_ctx.plan(auto_apply=True).has_changes + + # no explicitly specified target should use dev because that's what's set for the default in the profiles.yml + assert profiles_yml["jaffle_shop"]["target"] == "dev" + default_ctx = Context(paths=[jaffle_shop_duckdb]) + assert default_ctx.config.get_state_schema() == "sqlmesh_state_jaffle_shop_dev_schema" + assert all("dev_schema" in fqn for fqn in default_ctx.models) + assert not default_ctx.plan(auto_apply=True).has_changes + + # an explicit state schema override set in `sqlmesh.yaml` should use that + sqlmesh_yaml_file = jaffle_shop_duckdb / "sqlmesh.yaml" + sqlmesh_yaml = yaml_load(sqlmesh_yaml_file) + sqlmesh_yaml["gateways"] = {"dev": {"state_schema": "sqlmesh_dev_state_override"}} + sqlmesh_yaml_file.write_text(yaml_dump(sqlmesh_yaml)) + default_ctx = Context(paths=[jaffle_shop_duckdb]) + assert default_ctx.config.get_state_schema() == "sqlmesh_dev_state_override" + assert all("dev_schema" in fqn for fqn in default_ctx.models) diff --git a/tests/fixtures/dbt/empty_project/profiles.yml b/tests/fixtures/dbt/empty_project/profiles.yml index b352fc5792..adae09e9c6 100644 --- a/tests/fixtures/dbt/empty_project/profiles.yml +++ b/tests/fixtures/dbt/empty_project/profiles.yml @@ -3,7 +3,11 @@ empty_project: target: __DEFAULT_TARGET__ outputs: - duckdb: + __DEFAULT_TARGET__: type: duckdb + # database is required for dbt < 1.5 where our adapter deliberately doesnt infer the database from the path and + # defaults it to "main", which raises a "project catalog doesnt match context catalog" error + # ref: https://github.com/TobikoData/sqlmesh/pull/1109 + database: empty_project path: 'empty_project.duckdb' threads: 4 From 9398485be2f6969c97180723b81937cbcf455d00 Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Wed, 8 Oct 2025 11:58:52 +0300 Subject: [PATCH 062/173] Chore: Fix typo in dbt error message (#5507) --- sqlmesh/dbt/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlmesh/dbt/source.py b/sqlmesh/dbt/source.py index efafbf1642..832ed0e156 100644 --- a/sqlmesh/dbt/source.py +++ b/sqlmesh/dbt/source.py @@ -79,7 +79,7 @@ def canonical_name(self, context: DbtContext) -> str: relation = source(self.source_name_, self.name) except Exception as e: raise ConfigError( - f"'source' macro failed for '{self.config_name}' with exeception '{e}'." + f"'source' macro failed for '{self.config_name}' with exception '{e}'." ) relation = relation.quote( From 46aaf78a01cf9ccc471b26290f088ca6202b3198 Mon Sep 17 00:00:00 2001 From: Tori Wei <41123940+toriwei@users.noreply.github.com> Date: Wed, 8 Oct 2025 08:48:04 -0700 Subject: [PATCH 063/173] fix: validate data_type for partition_by config (#5491) --- sqlmesh/dbt/model.py | 8 ++++++++ tests/dbt/test_transformation.py | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index 09c410561d..d882f94942 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -215,6 +215,14 @@ def _validate_partition_by( ): granularity = v["granularity"] raise ConfigError(f"Unexpected granularity '{granularity}' in partition_by '{v}'.") + if "data_type" in v and v["data_type"].lower() not in ( + "timestamp", + "date", + "datetime", + "int64", + ): + data_type = v["data_type"] + raise ConfigError(f"Unexpected data_type '{data_type}' in partition_by '{v}'.") return {"data_type": "date", "granularity": "day", **v} raise ConfigError(f"Invalid format for partition_by '{v}'") diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index dd69f46200..e519713d26 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -1881,6 +1881,17 @@ def test_partition_by(sushi_test_project: Project): ) assert model_config.to_sqlmesh(context).partitioned_by == [] + with pytest.raises(ConfigError, match="Unexpected data_type 'string' in partition_by"): + ModelConfig( + name="model", + alias="model", + schema="test", + package_name="package", + materialized="table", + partition_by={"field": "ds", "data_type": "string"}, + sql="""SELECT 1 AS one, ds FROM foo""", + ) + @pytest.mark.xdist_group("dbt_manifest") def test_partition_by_none(sushi_test_project: Project): From 3535195aa272ccc5eac4d3ca64cdaea148f2ddc0 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Wed, 8 Oct 2025 18:59:52 +0300 Subject: [PATCH 064/173] Chore!: bump sqlglot to v27.22.0 (#5508) --- pyproject.toml | 2 +- tests/core/test_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 053b242813..71c9d62bbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.20.0", + "sqlglot[rs]~=27.24.2", "tenacity", "time-machine", "json-stream" diff --git a/tests/core/test_model.py b/tests/core/test_model.py index f6fc448b79..c3feef6095 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -919,7 +919,7 @@ def test_json_serde(): assert ( SqlModel.parse_obj(model_json_parsed).render_query().sql("duckdb") - == 'SELECT REGEXP_MATCHES("x", "y") AS "c"' + == 'SELECT REGEXP_FULL_MATCH("x", "y") AS "c"' ) From bbdbd4800493ef17e536d04e15b9129303b81b1f Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Wed, 8 Oct 2025 09:40:58 -0700 Subject: [PATCH 065/173] feat: batch expired snapshots (#5486) --- docs/reference/configuration.md | 7 +- sqlmesh/core/config/janitor.py | 12 + sqlmesh/core/context.py | 20 +- sqlmesh/core/state_sync/base.py | 51 +- sqlmesh/core/state_sync/cache.py | 13 +- sqlmesh/core/state_sync/common.py | 280 ++++++++++- sqlmesh/core/state_sync/db/facade.py | 37 +- sqlmesh/core/state_sync/db/snapshot.py | 192 ++++---- tests/core/state_sync/test_state_sync.py | 563 +++++++++++++++++++++-- tests/core/test_context.py | 2 +- 10 files changed, 1004 insertions(+), 173 deletions(-) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 676f9d7389..b13438ee2d 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -125,9 +125,10 @@ Formatting settings for the `sqlmesh format` command and UI. Configuration for the `sqlmesh janitor` command. -| Option | Description | Type | Required | -|--------------------------|----------------------------------------------------------------------------------------------------------------------------|:-------:|:--------:| -| `warn_on_delete_failure` | Whether to warn instead of erroring if the janitor fails to delete the expired environment schema / views (Default: False) | boolean | N | +| Option | Description | Type | Required | +|---------------------------------|----------------------------------------------------------------------------------------------------------------------------|:-------:|:--------:| +| `warn_on_delete_failure` | Whether to warn instead of erroring if the janitor fails to delete the expired environment schema / views (Default: False) | boolean | N | +| `expired_snapshots_batch_size` | Maximum number of expired snapshots to clean in a single batch (Default: 200) | int | N | ## UI diff --git a/sqlmesh/core/config/janitor.py b/sqlmesh/core/config/janitor.py index d288c90b3e..0f1c953bc0 100644 --- a/sqlmesh/core/config/janitor.py +++ b/sqlmesh/core/config/janitor.py @@ -1,7 +1,9 @@ from __future__ import annotations +import typing as t from sqlmesh.core.config.base import BaseConfig +from sqlmesh.utils.pydantic import field_validator class JanitorConfig(BaseConfig): @@ -9,6 +11,16 @@ class JanitorConfig(BaseConfig): Args: warn_on_delete_failure: Whether to warn instead of erroring if the janitor fails to delete the expired environment schema / views. + expired_snapshots_batch_size: Maximum number of expired snapshots to clean in a single batch. """ warn_on_delete_failure: bool = False + expired_snapshots_batch_size: t.Optional[int] = None + + @field_validator("expired_snapshots_batch_size", mode="before") + @classmethod + def _validate_batch_size(cls, value: int) -> int: + batch_size = int(value) + if batch_size <= 0: + raise ValueError("expired_snapshots_batch_size must be greater than 0") + return batch_size diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index e31a04fe81..bd8647f811 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -109,6 +109,7 @@ StateSync, cleanup_expired_views, ) +from sqlmesh.core.state_sync.common import delete_expired_snapshots from sqlmesh.core.table_diff import TableDiff from sqlmesh.core.test import ( ModelTextTestResult, @@ -2852,19 +2853,14 @@ def _run_janitor(self, ignore_ttl: bool = False) -> None: # Clean up expired environments by removing their views and schemas self._cleanup_environments(current_ts=current_ts) - cleanup_targets = self.state_sync.get_expired_snapshots( - ignore_ttl=ignore_ttl, current_ts=current_ts - ) - - # Remove the expired snapshots tables - self.snapshot_evaluator.cleanup( - target_snapshots=cleanup_targets, - on_complete=self.console.update_cleanup_progress, + delete_expired_snapshots( + self.state_sync, + self.snapshot_evaluator, + current_ts=current_ts, + ignore_ttl=ignore_ttl, + console=self.console, + batch_size=self.config.janitor.expired_snapshots_batch_size, ) - - # Delete the expired snapshot records from the state sync - self.state_sync.delete_expired_snapshots(ignore_ttl=ignore_ttl, current_ts=current_ts) - self.state_sync.compact_intervals() def _cleanup_environments(self, current_ts: t.Optional[int] = None) -> None: diff --git a/sqlmesh/core/state_sync/base.py b/sqlmesh/core/state_sync/base.py index 2f8a68dd4a..3c8c72845d 100644 --- a/sqlmesh/core/state_sync/base.py +++ b/sqlmesh/core/state_sync/base.py @@ -11,7 +11,6 @@ from sqlmesh import migrations from sqlmesh.core.environment import ( Environment, - EnvironmentNamingInfo, EnvironmentStatements, EnvironmentSummary, ) @@ -21,8 +20,6 @@ SnapshotIdLike, SnapshotIdAndVersionLike, SnapshotInfoLike, - SnapshotTableCleanupTask, - SnapshotTableInfo, SnapshotNameVersion, SnapshotIdAndVersion, ) @@ -30,8 +27,13 @@ from sqlmesh.utils import major_minor from sqlmesh.utils.date import TimeLike from sqlmesh.utils.errors import SQLMeshError -from sqlmesh.utils.pydantic import PydanticModel, ValidationInfo, field_validator -from sqlmesh.core.state_sync.common import StateStream +from sqlmesh.utils.pydantic import PydanticModel, field_validator +from sqlmesh.core.state_sync.common import ( + StateStream, + ExpiredSnapshotBatch, + PromotionResult, + ExpiredBatchRange, +) logger = logging.getLogger(__name__) @@ -72,20 +74,6 @@ def _schema_version_validator(cls, v: t.Any) -> int: SCHEMA_VERSION: int = MIN_SCHEMA_VERSION + len(MIGRATIONS) - 1 -class PromotionResult(PydanticModel): - added: t.List[SnapshotTableInfo] - removed: t.List[SnapshotTableInfo] - removed_environment_naming_info: t.Optional[EnvironmentNamingInfo] - - @field_validator("removed_environment_naming_info") - def _validate_removed_environment_naming_info( - cls, v: t.Optional[EnvironmentNamingInfo], info: ValidationInfo - ) -> t.Optional[EnvironmentNamingInfo]: - if v and not info.data.get("removed"): - raise ValueError("removed_environment_naming_info must be None if removed is empty") - return v - - class StateReader(abc.ABC): """Abstract base class for read-only operations on snapshot and environment state.""" @@ -315,15 +303,21 @@ def export(self, environment_names: t.Optional[t.List[str]] = None) -> StateStre @abc.abstractmethod def get_expired_snapshots( - self, current_ts: t.Optional[int] = None, ignore_ttl: bool = False - ) -> t.List[SnapshotTableCleanupTask]: - """Aggregates the id's of the expired snapshots and creates a list of table cleanup tasks. + self, + *, + batch_range: ExpiredBatchRange, + current_ts: t.Optional[int] = None, + ignore_ttl: bool = False, + ) -> t.Optional[ExpiredSnapshotBatch]: + """Returns a single batch of expired snapshots ordered by (updated_ts, name, identifier). - Expired snapshots are snapshots that have exceeded their time-to-live - and are no longer in use within an environment. + Args: + current_ts: Timestamp used to evaluate expiration. + ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). + batch_range: The range of the batch to fetch. Returns: - The list of table cleanup tasks. + A batch describing expired snapshots or None if no snapshots are pending cleanup. """ @abc.abstractmethod @@ -363,7 +357,10 @@ def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: @abc.abstractmethod def delete_expired_snapshots( - self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None + self, + batch_range: ExpiredBatchRange, + ignore_ttl: bool = False, + current_ts: t.Optional[int] = None, ) -> None: """Removes expired snapshots. @@ -371,8 +368,10 @@ def delete_expired_snapshots( and are no longer in use within an environment. Args: + batch_range: The range of snapshots to delete in this batch. ignore_ttl: Ignore the TTL on the snapshot when considering it expired. This has the effect of deleting all snapshots that are not referenced in any environment + current_ts: Timestamp used to evaluate expiration. """ @abc.abstractmethod diff --git a/sqlmesh/core/state_sync/cache.py b/sqlmesh/core/state_sync/cache.py index 3de4e7bf51..77f3fc6ba5 100644 --- a/sqlmesh/core/state_sync/cache.py +++ b/sqlmesh/core/state_sync/cache.py @@ -12,6 +12,7 @@ ) from sqlmesh.core.snapshot.definition import Interval, SnapshotIntervals from sqlmesh.core.state_sync.base import DelegatingStateSync, StateSync +from sqlmesh.core.state_sync.common import ExpiredBatchRange from sqlmesh.utils.date import TimeLike, now_timestamp @@ -108,11 +109,17 @@ def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: self.state_sync.delete_snapshots(snapshot_ids) def delete_expired_snapshots( - self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None + self, + batch_range: ExpiredBatchRange, + ignore_ttl: bool = False, + current_ts: t.Optional[int] = None, ) -> None: - current_ts = current_ts or now_timestamp() self.snapshot_cache.clear() - self.state_sync.delete_expired_snapshots(current_ts=current_ts, ignore_ttl=ignore_ttl) + self.state_sync.delete_expired_snapshots( + batch_range=batch_range, + ignore_ttl=ignore_ttl, + current_ts=current_ts, + ) def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None: for snapshot_intervals in snapshots_intervals: diff --git a/sqlmesh/core/state_sync/common.py b/sqlmesh/core/state_sync/common.py index cd8c389e33..3fdd0bc015 100644 --- a/sqlmesh/core/state_sync/common.py +++ b/sqlmesh/core/state_sync/common.py @@ -7,21 +7,31 @@ import abc from dataclasses import dataclass + +from pydantic_core.core_schema import ValidationInfo from sqlglot import exp from sqlmesh.core.console import Console from sqlmesh.core.dialect import schema_ -from sqlmesh.utils.pydantic import PydanticModel -from sqlmesh.core.environment import Environment, EnvironmentStatements +from sqlmesh.utils.pydantic import PydanticModel, field_validator +from sqlmesh.core.environment import Environment, EnvironmentStatements, EnvironmentNamingInfo from sqlmesh.utils.errors import SQLMeshError -from sqlmesh.core.snapshot import Snapshot +from sqlmesh.core.snapshot import ( + Snapshot, + SnapshotEvaluator, + SnapshotId, + SnapshotTableCleanupTask, + SnapshotTableInfo, +) if t.TYPE_CHECKING: from sqlmesh.core.engine_adapter.base import EngineAdapter - from sqlmesh.core.state_sync.base import Versions + from sqlmesh.core.state_sync.base import Versions, StateReader, StateSync logger = logging.getLogger(__name__) +EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE = 200 + def cleanup_expired_views( default_adapter: EngineAdapter, @@ -215,3 +225,265 @@ def __iter__(self) -> t.Iterator[StateStreamContents]: yield EnvironmentsChunk(environments) return _StateStream() + + +class ExpiredBatchRange(PydanticModel): + start: RowBoundary + end: t.Union[RowBoundary, LimitBoundary] + + @classmethod + def init_batch_range(cls, batch_size: int) -> ExpiredBatchRange: + return ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=LimitBoundary(batch_size=batch_size), + ) + + @classmethod + def all_batch_range(cls) -> ExpiredBatchRange: + return ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=RowBoundary.highest_boundary(), + ) + + @classmethod + def _expanded_tuple_comparison( + cls, + columns: t.List[exp.Column], + values: t.List[exp.Literal], + operator: t.Type[exp.Expression], + ) -> exp.Expression: + """Generate expanded tuple comparison that works across all SQL engines. + + Converts tuple comparisons like (a, b, c) OP (x, y, z) into an expanded form + that's compatible with all SQL engines, since native tuple comparisons have + inconsistent support across engines (especially DuckDB, MySQL, SQLite). + + Repro of problem with DuckDB: + "SELECT * FROM VALUES(1,'2') as test(a,b) WHERE ((a, b) > (1, 'foo')) AND ((a, b) <= (10, 'baz'))" + + Args: + columns: List of column expressions to compare + values: List of value expressions to compare against + operator: The comparison operator class (exp.GT, exp.GTE, exp.LT, exp.LTE) + + Examples: + (a, b, c) > (x, y, z) expands to: + a > x OR (a = x AND b > y) OR (a = x AND b = y AND c > z) + + (a, b, c) <= (x, y, z) expands to: + a < x OR (a = x AND b < y) OR (a = x AND b = y AND c <= z) + + (a, b, c) >= (x, y, z) expands to: + a > x OR (a = x AND b > y) OR (a = x AND b = y AND c >= z) + + Returns: + An expanded OR expression representing the tuple comparison + """ + if operator not in (exp.GT, exp.GTE, exp.LT, exp.LTE): + raise ValueError(f"Unsupported operator: {operator}. Use GT, GTE, LT, or LTE.") + + # For <= and >=, we use the strict operator for all but the last column + # e.g., (a, b) <= (x, y) becomes: a < x OR (a = x AND b <= y) + # For < and >, we use the strict operator throughout + # e.g., (a, b) > (x, y) becomes: a > x OR (a = x AND b > x) + strict_operator: t.Type[exp.Expression] + final_operator: t.Type[exp.Expression] + + if operator in (exp.LTE, exp.GTE): + # For inclusive operators (<=, >=), use strict form for intermediate columns + # but keep inclusive form for the last column + strict_operator = exp.LT if operator == exp.LTE else exp.GT + final_operator = operator # Keep LTE/GTE for last column + else: + # For strict operators (<, >), use them throughout + strict_operator = operator + final_operator = operator + + conditions: t.List[exp.Expression] = [] + for i in range(len(columns)): + # Build equality conditions for all columns before current + equality_conditions = [exp.EQ(this=columns[j], expression=values[j]) for j in range(i)] + + # Use the final operator for the last column, strict for others + comparison_op = final_operator if i == len(columns) - 1 else strict_operator + comparison_condition = comparison_op(this=columns[i], expression=values[i]) + + if equality_conditions: + conditions.append(exp.and_(*equality_conditions, comparison_condition)) + else: + conditions.append(comparison_condition) + + return exp.or_(*conditions) if len(conditions) > 1 else conditions[0] + + @property + def where_filter(self) -> exp.Expression: + # Use expanded tuple comparisons for cross-engine compatibility + # Native tuple comparisons like (a, b) > (x, y) don't work reliably across all SQL engines + columns = [ + exp.column("updated_ts"), + exp.column("name"), + exp.column("identifier"), + ] + start_values = [ + exp.Literal.number(self.start.updated_ts), + exp.Literal.string(self.start.name), + exp.Literal.string(self.start.identifier), + ] + + start_condition = self._expanded_tuple_comparison(columns, start_values, exp.GT) + + range_filter: exp.Expression + if isinstance(self.end, RowBoundary): + end_values = [ + exp.Literal.number(self.end.updated_ts), + exp.Literal.string(self.end.name), + exp.Literal.string(self.end.identifier), + ] + end_condition = self._expanded_tuple_comparison(columns, end_values, exp.LTE) + range_filter = exp.and_(start_condition, end_condition) + else: + range_filter = start_condition + return range_filter + + +class RowBoundary(PydanticModel): + updated_ts: int + name: str + identifier: str + + @classmethod + def lowest_boundary(cls) -> RowBoundary: + return RowBoundary(updated_ts=0, name="", identifier="") + + @classmethod + def highest_boundary(cls) -> RowBoundary: + # 9999-12-31T23:59:59.999Z in epoch milliseconds + return RowBoundary(updated_ts=253_402_300_799_999, name="", identifier="") + + +class LimitBoundary(PydanticModel): + batch_size: int + + @classmethod + def init_batch_boundary(cls, batch_size: int) -> LimitBoundary: + return LimitBoundary(batch_size=batch_size) + + +class PromotionResult(PydanticModel): + added: t.List[SnapshotTableInfo] + removed: t.List[SnapshotTableInfo] + removed_environment_naming_info: t.Optional[EnvironmentNamingInfo] + + @field_validator("removed_environment_naming_info") + def _validate_removed_environment_naming_info( + cls, v: t.Optional[EnvironmentNamingInfo], info: ValidationInfo + ) -> t.Optional[EnvironmentNamingInfo]: + if v and not info.data.get("removed"): + raise ValueError("removed_environment_naming_info must be None if removed is empty") + return v + + +class ExpiredSnapshotBatch(PydanticModel): + """A batch of expired snapshots to be cleaned up.""" + + expired_snapshot_ids: t.Set[SnapshotId] + cleanup_tasks: t.List[SnapshotTableCleanupTask] + batch_range: ExpiredBatchRange + + +def iter_expired_snapshot_batches( + state_reader: StateReader, + *, + current_ts: int, + ignore_ttl: bool = False, + batch_size: t.Optional[int] = None, +) -> t.Iterator[ExpiredSnapshotBatch]: + """Yields expired snapshot batches. + + Args: + state_reader: StateReader instance to query expired snapshots from. + current_ts: Timestamp used to evaluate expiration. + ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). + batch_size: Maximum number of snapshots to fetch per batch. + """ + + batch_size = batch_size if batch_size is not None else EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE + batch_range = ExpiredBatchRange.init_batch_range(batch_size=batch_size) + + while True: + batch = state_reader.get_expired_snapshots( + current_ts=current_ts, + ignore_ttl=ignore_ttl, + batch_range=batch_range, + ) + + if batch is None: + return + + yield batch + + assert isinstance(batch.batch_range.end, RowBoundary), ( + "Only RowBoundary is supported for pagination currently" + ) + batch_range = ExpiredBatchRange( + start=batch.batch_range.end, + end=LimitBoundary(batch_size=batch_size), + ) + + +def delete_expired_snapshots( + state_sync: StateSync, + snapshot_evaluator: SnapshotEvaluator, + *, + current_ts: int, + ignore_ttl: bool = False, + batch_size: t.Optional[int] = None, + console: t.Optional[Console] = None, +) -> None: + """Delete all expired snapshots in batches. + + This helper function encapsulates the logic for deleting expired snapshots in batches, + eliminating code duplication across different use cases. + + Args: + state_sync: StateSync instance to query and delete expired snapshots from. + snapshot_evaluator: SnapshotEvaluator instance to clean up tables associated with snapshots. + current_ts: Timestamp used to evaluate expiration. + ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). + batch_size: Maximum number of snapshots to fetch per batch. + console: Optional console for reporting progress. + + Returns: + The total number of deleted expired snapshots. + """ + num_expired_snapshots = 0 + for batch in iter_expired_snapshot_batches( + state_reader=state_sync, + current_ts=current_ts, + ignore_ttl=ignore_ttl, + batch_size=batch_size, + ): + end_info = ( + f"updated_ts={batch.batch_range.end.updated_ts}" + if isinstance(batch.batch_range.end, RowBoundary) + else f"limit={batch.batch_range.end.batch_size}" + ) + logger.info( + "Processing batch of size %s with end %s", + len(batch.expired_snapshot_ids), + end_info, + ) + snapshot_evaluator.cleanup( + target_snapshots=batch.cleanup_tasks, + on_complete=console.update_cleanup_progress if console else None, + ) + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=batch.batch_range.end, + ), + ignore_ttl=ignore_ttl, + ) + logger.info("Cleaned up expired snapshots batch") + num_expired_snapshots += len(batch.expired_snapshot_ids) + logger.info("Cleaned up %s expired snapshots", num_expired_snapshots) diff --git a/sqlmesh/core/state_sync/db/facade.py b/sqlmesh/core/state_sync/db/facade.py index 3c23ef339c..49f7b5b92f 100644 --- a/sqlmesh/core/state_sync/db/facade.py +++ b/sqlmesh/core/state_sync/db/facade.py @@ -35,7 +35,6 @@ SnapshotInfoLike, SnapshotIntervals, SnapshotNameVersion, - SnapshotTableCleanupTask, SnapshotTableInfo, start_date, ) @@ -43,7 +42,6 @@ Interval, ) from sqlmesh.core.state_sync.base import ( - PromotionResult, StateSync, Versions, ) @@ -55,6 +53,9 @@ StateStream, chunk_iterable, EnvironmentWithStatements, + ExpiredSnapshotBatch, + PromotionResult, + ExpiredBatchRange, ) from sqlmesh.core.state_sync.db.interval import IntervalState from sqlmesh.core.state_sync.db.environment import EnvironmentState @@ -261,11 +262,18 @@ def invalidate_environment(self, name: str, protect_prod: bool = True) -> None: self.environment_state.invalidate_environment(name, protect_prod) def get_expired_snapshots( - self, current_ts: t.Optional[int] = None, ignore_ttl: bool = False - ) -> t.List[SnapshotTableCleanupTask]: + self, + *, + batch_range: ExpiredBatchRange, + current_ts: t.Optional[int] = None, + ignore_ttl: bool = False, + ) -> t.Optional[ExpiredSnapshotBatch]: current_ts = current_ts or now_timestamp() return self.snapshot_state.get_expired_snapshots( - self.environment_state.get_environments(), current_ts=current_ts, ignore_ttl=ignore_ttl + environments=self.environment_state.get_environments(), + current_ts=current_ts, + ignore_ttl=ignore_ttl, + batch_range=batch_range, ) def get_expired_environments(self, current_ts: int) -> t.List[EnvironmentSummary]: @@ -273,14 +281,19 @@ def get_expired_environments(self, current_ts: int) -> t.List[EnvironmentSummary @transactional() def delete_expired_snapshots( - self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None + self, + batch_range: ExpiredBatchRange, + ignore_ttl: bool = False, + current_ts: t.Optional[int] = None, ) -> None: - current_ts = current_ts or now_timestamp() - for expired_snapshot_ids, cleanup_targets in self.snapshot_state._get_expired_snapshots( - self.environment_state.get_environments(), ignore_ttl=ignore_ttl, current_ts=current_ts - ): - self.snapshot_state.delete_snapshots(expired_snapshot_ids) - self.interval_state.cleanup_intervals(cleanup_targets, expired_snapshot_ids) + batch = self.get_expired_snapshots( + ignore_ttl=ignore_ttl, + current_ts=current_ts, + batch_range=batch_range, + ) + if batch and batch.expired_snapshot_ids: + self.snapshot_state.delete_snapshots(batch.expired_snapshot_ids) + self.interval_state.cleanup_intervals(batch.cleanup_tasks, batch.expired_snapshot_ids) @transactional() def delete_expired_environments( diff --git a/sqlmesh/core/state_sync/db/snapshot.py b/sqlmesh/core/state_sync/db/snapshot.py index 4a8b2c44c5..4565990d65 100644 --- a/sqlmesh/core/state_sync/db/snapshot.py +++ b/sqlmesh/core/state_sync/db/snapshot.py @@ -14,7 +14,6 @@ snapshot_id_filter, fetchone, fetchall, - create_batches, ) from sqlmesh.core.environment import Environment from sqlmesh.core.model import SeedModel, ModelKindName @@ -30,6 +29,12 @@ SnapshotId, SnapshotFingerprint, ) +from sqlmesh.core.state_sync.common import ( + RowBoundary, + ExpiredSnapshotBatch, + ExpiredBatchRange, + LimitBoundary, +) from sqlmesh.utils.migration import index_text_type, blob_text_type from sqlmesh.utils.date import now_timestamp, TimeLike, to_timestamp from sqlmesh.utils import unique @@ -43,9 +48,6 @@ class SnapshotState: SNAPSHOT_BATCH_SIZE = 1000 - # Use a smaller batch size for expired snapshots to account for fetching - # of all snapshots that share the same version. - EXPIRED_SNAPSHOT_BATCH_SIZE = 200 def __init__( self, @@ -166,47 +168,19 @@ def get_expired_snapshots( self, environments: t.Iterable[Environment], current_ts: int, - ignore_ttl: bool = False, - ) -> t.List[SnapshotTableCleanupTask]: - """Aggregates the id's of the expired snapshots and creates a list of table cleanup tasks. - - Expired snapshots are snapshots that have exceeded their time-to-live - and are no longer in use within an environment. - - Returns: - The set of expired snapshot ids. - The list of table cleanup tasks. - """ - all_cleanup_targets = [] - for _, cleanup_targets in self._get_expired_snapshots( - environments=environments, - current_ts=current_ts, - ignore_ttl=ignore_ttl, - ): - all_cleanup_targets.extend(cleanup_targets) - return all_cleanup_targets - - def _get_expired_snapshots( - self, - environments: t.Iterable[Environment], - current_ts: int, - ignore_ttl: bool = False, - ) -> t.Iterator[t.Tuple[t.Set[SnapshotId], t.List[SnapshotTableCleanupTask]]]: - expired_query = exp.select("name", "identifier", "version").from_(self.snapshots_table) + ignore_ttl: bool, + batch_range: ExpiredBatchRange, + ) -> t.Optional[ExpiredSnapshotBatch]: + expired_query = exp.select("name", "identifier", "version", "updated_ts").from_( + self.snapshots_table + ) if not ignore_ttl: expired_query = expired_query.where( (exp.column("updated_ts") + exp.column("ttl_ms")) <= current_ts ) - expired_candidates = { - SnapshotId(name=name, identifier=identifier): SnapshotNameVersion( - name=name, version=version - ) - for name, identifier, version in fetchall(self.engine_adapter, expired_query) - } - if not expired_candidates: - return + expired_query = expired_query.where(batch_range.where_filter) promoted_snapshot_ids = { snapshot.snapshot_id @@ -214,63 +188,111 @@ def _get_expired_snapshots( for snapshot in environment.snapshots } + if promoted_snapshot_ids: + not_in_conditions = [ + exp.not_(condition) + for condition in snapshot_id_filter( + self.engine_adapter, + promoted_snapshot_ids, + batch_size=self.SNAPSHOT_BATCH_SIZE, + ) + ] + expired_query = expired_query.where(exp.and_(*not_in_conditions)) + + expired_query = expired_query.order_by( + exp.column("updated_ts"), exp.column("name"), exp.column("identifier") + ) + + if isinstance(batch_range.end, LimitBoundary): + expired_query = expired_query.limit(batch_range.end.batch_size) + + rows = fetchall(self.engine_adapter, expired_query) + + if not rows: + return None + + expired_candidates = { + SnapshotId(name=name, identifier=identifier): SnapshotNameVersion( + name=name, version=version + ) + for name, identifier, version, _ in rows + } + if not expired_candidates: + return None + def _is_snapshot_used(snapshot: SnapshotIdAndVersion) -> bool: return ( snapshot.snapshot_id in promoted_snapshot_ids or snapshot.snapshot_id not in expired_candidates ) - unique_expired_versions = unique(expired_candidates.values()) - version_batches = create_batches( - unique_expired_versions, batch_size=self.EXPIRED_SNAPSHOT_BATCH_SIZE + # Extract cursor values from last row for pagination + last_row = rows[-1] + last_row_boundary = RowBoundary( + updated_ts=last_row[3], + name=last_row[0], + identifier=last_row[1], ) - for versions_batch in version_batches: - snapshots = self._get_snapshots_with_same_version(versions_batch) - - snapshots_by_version = defaultdict(set) - snapshots_by_dev_version = defaultdict(set) - for s in snapshots: - snapshots_by_version[(s.name, s.version)].add(s.snapshot_id) - snapshots_by_dev_version[(s.name, s.dev_version)].add(s.snapshot_id) - - expired_snapshots = [s for s in snapshots if not _is_snapshot_used(s)] - all_expired_snapshot_ids = {s.snapshot_id for s in expired_snapshots} - - cleanup_targets: t.List[t.Tuple[SnapshotId, bool]] = [] - for snapshot in expired_snapshots: - shared_version_snapshots = snapshots_by_version[(snapshot.name, snapshot.version)] - shared_version_snapshots.discard(snapshot.snapshot_id) - - shared_dev_version_snapshots = snapshots_by_dev_version[ - (snapshot.name, snapshot.dev_version) - ] - shared_dev_version_snapshots.discard(snapshot.snapshot_id) - - if not shared_dev_version_snapshots: - dev_table_only = bool(shared_version_snapshots) - cleanup_targets.append((snapshot.snapshot_id, dev_table_only)) - - snapshot_ids_to_cleanup = [snapshot_id for snapshot_id, _ in cleanup_targets] - for snapshot_id_batch in create_batches( - snapshot_ids_to_cleanup, batch_size=self.SNAPSHOT_BATCH_SIZE - ): - snapshot_id_batch_set = set(snapshot_id_batch) - full_snapshots = self._get_snapshots(snapshot_id_batch_set) - cleanup_tasks = [ + # The returned batch_range represents the actual range of rows in this batch + result_batch_range = ExpiredBatchRange( + start=batch_range.start, + end=last_row_boundary, + ) + + unique_expired_versions = unique(expired_candidates.values()) + expired_snapshot_ids: t.Set[SnapshotId] = set() + cleanup_tasks: t.List[SnapshotTableCleanupTask] = [] + + snapshots = self._get_snapshots_with_same_version(unique_expired_versions) + + snapshots_by_version = defaultdict(set) + snapshots_by_dev_version = defaultdict(set) + for s in snapshots: + snapshots_by_version[(s.name, s.version)].add(s.snapshot_id) + snapshots_by_dev_version[(s.name, s.dev_version)].add(s.snapshot_id) + + expired_snapshots = [s for s in snapshots if not _is_snapshot_used(s)] + all_expired_snapshot_ids = {s.snapshot_id for s in expired_snapshots} + + cleanup_targets: t.List[t.Tuple[SnapshotId, bool]] = [] + for snapshot in expired_snapshots: + shared_version_snapshots = snapshots_by_version[(snapshot.name, snapshot.version)] + shared_version_snapshots.discard(snapshot.snapshot_id) + + shared_dev_version_snapshots = snapshots_by_dev_version[ + (snapshot.name, snapshot.dev_version) + ] + shared_dev_version_snapshots.discard(snapshot.snapshot_id) + + if not shared_dev_version_snapshots: + dev_table_only = bool(shared_version_snapshots) + cleanup_targets.append((snapshot.snapshot_id, dev_table_only)) + + snapshot_ids_to_cleanup = [snapshot_id for snapshot_id, _ in cleanup_targets] + full_snapshots = self._get_snapshots(snapshot_ids_to_cleanup) + for snapshot_id, dev_table_only in cleanup_targets: + if snapshot_id in full_snapshots: + cleanup_tasks.append( SnapshotTableCleanupTask( snapshot=full_snapshots[snapshot_id].table_info, dev_table_only=dev_table_only, ) - for snapshot_id, dev_table_only in cleanup_targets - if snapshot_id in full_snapshots - ] - all_expired_snapshot_ids -= snapshot_id_batch_set - yield snapshot_id_batch_set, cleanup_tasks - - if all_expired_snapshot_ids: - # Remaining expired snapshots for which there are no tables - # to cleanup - yield all_expired_snapshot_ids, [] + ) + expired_snapshot_ids.add(snapshot_id) + all_expired_snapshot_ids.discard(snapshot_id) + + # Add any remaining expired snapshots that don't require cleanup + if all_expired_snapshot_ids: + expired_snapshot_ids.update(all_expired_snapshot_ids) + + if expired_snapshot_ids or cleanup_tasks: + return ExpiredSnapshotBatch( + expired_snapshot_ids=expired_snapshot_ids, + cleanup_tasks=cleanup_tasks, + batch_range=result_batch_range, + ) + + return None def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: """Deletes snapshots. diff --git a/tests/core/state_sync/test_state_sync.py b/tests/core/state_sync/test_state_sync.py index 51a646ce5d..199ca43ee9 100644 --- a/tests/core/state_sync/test_state_sync.py +++ b/tests/core/state_sync/test_state_sync.py @@ -43,15 +43,33 @@ from sqlmesh.core.state_sync.base import ( SCHEMA_VERSION, SQLGLOT_VERSION, - PromotionResult, Versions, ) +from sqlmesh.core.state_sync.common import ( + ExpiredBatchRange, + LimitBoundary, + PromotionResult, + RowBoundary, +) from sqlmesh.utils.date import now_timestamp, to_datetime, to_timestamp from sqlmesh.utils.errors import SQLMeshError, StateMigrationError pytestmark = pytest.mark.slow +def _get_cleanup_tasks( + state_sync: EngineAdapterStateSync, + *, + limit: int = 1000, + ignore_ttl: bool = False, +) -> t.List[SnapshotTableCleanupTask]: + batch = state_sync.get_expired_snapshots( + ignore_ttl=ignore_ttl, + batch_range=ExpiredBatchRange.init_batch_range(batch_size=limit), + ) + return [] if batch is None else batch.cleanup_tasks + + @pytest.fixture def state_sync(duck_conn, tmp_path): state_sync = EngineAdapterStateSync( @@ -1156,15 +1174,504 @@ def test_delete_expired_snapshots(state_sync: EngineAdapterStateSync, make_snaps new_snapshot.snapshot_id, } - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True), SnapshotTableCleanupTask(snapshot=new_snapshot.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) +def test_get_expired_snapshot_batch(state_sync: EngineAdapterStateSync, make_snapshot: t.Callable): + now_ts = now_timestamp() + + snapshots = [] + for idx in range(3): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + batch = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange.init_batch_range(batch_size=2), + ) + assert batch is not None + assert len(batch.expired_snapshot_ids) == 2 + assert len(batch.cleanup_tasks) == 2 + + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=batch.batch_range.end, + ), + ) + + next_batch = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert next_batch is not None + assert len(next_batch.expired_snapshot_ids) == 1 + + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=next_batch.batch_range.start, + end=next_batch.batch_range.end, + ), + ) + + assert ( + state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=next_batch.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + is None + ) + + +def test_get_expired_snapshot_batch_same_timestamp( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable +): + """Test that pagination works correctly when multiple snapshots have the same updated_ts.""" + now_ts = now_timestamp() + same_timestamp = now_ts - 20000 + + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx:02d}", # Zero-padded to ensure deterministic name ordering + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + # All snapshots have the same updated_ts + snapshot.updated_ts = same_timestamp + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Fetch first batch of 2 + batch1 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange.init_batch_range(batch_size=2), + ) + assert batch1 is not None + assert len(batch1.expired_snapshot_ids) == 2 + assert sorted([x.name for x in batch1.expired_snapshot_ids]) == [ + '"model_00"', + '"model_01"', + ] + + # Fetch second batch of 2 using cursor from batch1 + batch2 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch1.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch2 is not None + assert len(batch2.expired_snapshot_ids) == 2 + assert sorted([x.name for x in batch2.expired_snapshot_ids]) == [ + '"model_02"', + '"model_03"', + ] + + # Fetch third batch of 2 using cursor from batch2 + batch3 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch2.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch3 is not None + assert sorted([x.name for x in batch3.expired_snapshot_ids]) == [ + '"model_04"', + ] + + +def test_delete_expired_snapshots_batching_with_deletion( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable +): + """Test that delete_expired_snapshots properly deletes batches as it pages through them.""" + now_ts = now_timestamp() + + # Create 5 expired snapshots with different timestamps + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Verify all 5 snapshots exist + assert len(state_sync.get_snapshots(snapshots)) == 5 + + # Get first batch of 2 + batch1 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange.init_batch_range(batch_size=2), + ) + assert batch1 is not None + assert len(batch1.expired_snapshot_ids) == 2 + + # Delete the first batch using batch_range + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch1.batch_range.start, + end=batch1.batch_range.end, + ), + ) + + # Verify first 2 snapshots (model_0 and model_1, the oldest) are deleted and last 3 remain + remaining = state_sync.get_snapshots(snapshots) + assert len(remaining) == 3 + assert snapshots[0].snapshot_id in remaining # model_0 (newest) + assert snapshots[1].snapshot_id in remaining # model_1 + assert snapshots[2].snapshot_id in remaining # model_2 + assert snapshots[3].snapshot_id not in remaining # model_3 + assert snapshots[4].snapshot_id not in remaining # model_4 (oldest) + + # Get next batch of 2 (should start after batch1's boundary) + batch2 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch1.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch2 is not None + assert len(batch2.expired_snapshot_ids) == 2 + + # Delete the second batch + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch2.batch_range.start, + end=batch2.batch_range.end, + ), + ) + + # Verify only the last snapshot remains + remaining = state_sync.get_snapshots(snapshots) + assert len(remaining) == 1 + assert snapshots[0].snapshot_id in remaining # model_0 (newest) + assert snapshots[1].snapshot_id not in remaining # model_1 + assert snapshots[2].snapshot_id not in remaining # model_2 + assert snapshots[3].snapshot_id not in remaining # model_3 + assert snapshots[4].snapshot_id not in remaining # model_4 (oldest) + + # Get final batch + batch3 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch2.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch3 is not None + assert len(batch3.expired_snapshot_ids) == 1 + + # Delete the final batch + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch3.batch_range.start, + end=batch3.batch_range.end, + ), + ) + + # Verify all snapshots are deleted + assert len(state_sync.get_snapshots(snapshots)) == 0 + + # Verify no more expired snapshots exist + assert ( + state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch3.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + is None + ) + + +def test_iterator_expired_snapshot_batch( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable +): + """Test the for_each_expired_snapshot_batch helper function.""" + from sqlmesh.core.state_sync.common import iter_expired_snapshot_batches + + now_ts = now_timestamp() + + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Track all batches processed + batches_processed = [] + + # Process with batch size of 2 + for batch in iter_expired_snapshot_batches( + state_sync, + current_ts=now_ts, + ignore_ttl=False, + batch_size=2, + ): + batches_processed.append(batch) + + # Should have processed 3 batches (2 + 2 + 1) + assert len(batches_processed) == 3 + assert len(batches_processed[0].expired_snapshot_ids) == 2 + assert len(batches_processed[1].expired_snapshot_ids) == 2 + assert len(batches_processed[2].expired_snapshot_ids) == 1 + + # Verify all snapshots were processed + all_processed_ids = set() + for batch in batches_processed: + all_processed_ids.update(batch.expired_snapshot_ids) + + expected_ids = {s.snapshot_id for s in snapshots} + assert all_processed_ids == expected_ids + + +@pytest.mark.parametrize( + "start_boundary,end_boundary,expected_sql", + [ + # Test with GT only (when end is LimitBoundary) + ( + RowBoundary(updated_ts=0, name="", identifier=""), + LimitBoundary(batch_size=100), + "updated_ts > 0 OR (updated_ts = 0 AND name > '') OR (updated_ts = 0 AND name = '' AND identifier > '')", + ), + # Test with GT and LTE (when both are RowBoundary) + ( + RowBoundary(updated_ts=1000, name="model_a", identifier="abc"), + RowBoundary(updated_ts=2000, name="model_z", identifier="xyz"), + "(updated_ts > 1000 OR (updated_ts = 1000 AND name > 'model_a') OR (updated_ts = 1000 AND name = 'model_a' AND identifier > 'abc')) AND (updated_ts < 2000 OR (updated_ts = 2000 AND name < 'model_z') OR (updated_ts = 2000 AND name = 'model_z' AND identifier <= 'xyz'))", + ), + # Test with zero timestamp + ( + RowBoundary(updated_ts=0, name="", identifier=""), + RowBoundary(updated_ts=1234567890, name="model_x", identifier="id_123"), + "(updated_ts > 0 OR (updated_ts = 0 AND name > '') OR (updated_ts = 0 AND name = '' AND identifier > '')) AND (updated_ts < 1234567890 OR (updated_ts = 1234567890 AND name < 'model_x') OR (updated_ts = 1234567890 AND name = 'model_x' AND identifier <= 'id_123'))", + ), + # Test with same timestamp, different names + ( + RowBoundary(updated_ts=5000, name="model_a", identifier="id_1"), + RowBoundary(updated_ts=5000, name="model_b", identifier="id_2"), + "(updated_ts > 5000 OR (updated_ts = 5000 AND name > 'model_a') OR (updated_ts = 5000 AND name = 'model_a' AND identifier > 'id_1')) AND (updated_ts < 5000 OR (updated_ts = 5000 AND name < 'model_b') OR (updated_ts = 5000 AND name = 'model_b' AND identifier <= 'id_2'))", + ), + # Test with same timestamp and name, different identifiers + ( + RowBoundary(updated_ts=7000, name="model_x", identifier="id_a"), + RowBoundary(updated_ts=7000, name="model_x", identifier="id_b"), + "(updated_ts > 7000 OR (updated_ts = 7000 AND name > 'model_x') OR (updated_ts = 7000 AND name = 'model_x' AND identifier > 'id_a')) AND (updated_ts < 7000 OR (updated_ts = 7000 AND name < 'model_x') OR (updated_ts = 7000 AND name = 'model_x' AND identifier <= 'id_b'))", + ), + # Test all_batch_range use case + ( + RowBoundary(updated_ts=0, name="", identifier=""), + RowBoundary(updated_ts=253_402_300_799_999, name="", identifier=""), + "(updated_ts > 0 OR (updated_ts = 0 AND name > '') OR (updated_ts = 0 AND name = '' AND identifier > '')) AND (updated_ts < 253402300799999 OR (updated_ts = 253402300799999 AND name < '') OR (updated_ts = 253402300799999 AND name = '' AND identifier <= ''))", + ), + ], +) +def test_expired_batch_range_where_filter(start_boundary, end_boundary, expected_sql): + """Test ExpiredBatchRange.where_filter generates correct SQL for various boundary combinations.""" + batch_range = ExpiredBatchRange(start=start_boundary, end=end_boundary) + result = batch_range.where_filter + assert result.sql() == expected_sql + + +def test_expired_batch_range_where_filter_with_limit(): + """Test that where_filter correctly handles LimitBoundary (only start condition, no end condition).""" + batch_range = ExpiredBatchRange( + start=RowBoundary(updated_ts=1000, name="model_a", identifier="abc"), + end=LimitBoundary(batch_size=50), + ) + result = batch_range.where_filter + # When end is LimitBoundary, should only have the start (GT) condition + assert ( + result.sql() + == "updated_ts > 1000 OR (updated_ts = 1000 AND name > 'model_a') OR (updated_ts = 1000 AND name = 'model_a' AND identifier > 'abc')" + ) + + +def test_delete_expired_snapshots_common_function_batching( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable, mocker: MockerFixture +): + """Test that the common delete_expired_snapshots function properly pages through batches and deletes them.""" + from sqlmesh.core.state_sync.common import delete_expired_snapshots + from sqlmesh.core.state_sync.common import ExpiredBatchRange, RowBoundary, LimitBoundary + from unittest.mock import MagicMock + + now_ts = now_timestamp() + + # Create 5 expired snapshots with different timestamps + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Spy on get_expired_snapshots and delete_expired_snapshots methods + get_expired_spy = mocker.spy(state_sync, "get_expired_snapshots") + delete_expired_spy = mocker.spy(state_sync, "delete_expired_snapshots") + + # Mock snapshot evaluator + mock_evaluator = MagicMock() + mock_evaluator.cleanup = MagicMock() + + # Run delete_expired_snapshots with batch_size=2 + delete_expired_snapshots( + state_sync, + mock_evaluator, + current_ts=now_ts, + batch_size=2, + ) + + # Verify get_expired_snapshots was called the correct number of times: + # - 3 batches (2+2+1): each batch triggers 2 calls (one from iter_expired_snapshot_batches, one from delete_expired_snapshots) + # - Plus 1 final call that returns empty to exit the loop + # Total: 3 * 2 + 1 = 7 calls + assert get_expired_spy.call_count == 7 + + # Verify the progression of batch_range calls from the iter_expired_snapshot_batches loop + # (calls at indices 0, 2, 4, 6 are from iter_expired_snapshot_batches) + # (calls at indices 1, 3, 5 are from delete_expired_snapshots in facade.py) + calls = get_expired_spy.call_args_list + + # First call from iterator should have a batch_range starting from the beginning + first_call_kwargs = calls[0][1] + assert "batch_range" in first_call_kwargs + first_range = first_call_kwargs["batch_range"] + assert isinstance(first_range, ExpiredBatchRange) + assert isinstance(first_range.start, RowBoundary) + assert isinstance(first_range.end, LimitBoundary) + assert first_range.end.batch_size == 2 + assert first_range.start.updated_ts == 0 + assert first_range.start.name == "" + assert first_range.start.identifier == "" + + # Third call (second batch from iterator) should have a batch_range from the first batch's range + third_call_kwargs = calls[2][1] + assert "batch_range" in third_call_kwargs + second_range = third_call_kwargs["batch_range"] + assert isinstance(second_range, ExpiredBatchRange) + assert isinstance(second_range.start, RowBoundary) + assert isinstance(second_range.end, LimitBoundary) + assert second_range.end.batch_size == 2 + # Should have progressed from the first batch + assert second_range.start.updated_ts > 0 + assert second_range.start.name == '"model_3"' + + # Fifth call (third batch from iterator) should have a batch_range from the second batch's range + fifth_call_kwargs = calls[4][1] + assert "batch_range" in fifth_call_kwargs + third_range = fifth_call_kwargs["batch_range"] + assert isinstance(third_range, ExpiredBatchRange) + assert isinstance(third_range.start, RowBoundary) + assert isinstance(third_range.end, LimitBoundary) + assert third_range.end.batch_size == 2 + # Should have progressed from the second batch + assert third_range.start.updated_ts >= second_range.start.updated_ts + assert third_range.start.name == '"model_1"' + + # Seventh call (final call from iterator) should have a batch_range from the third batch's range + seventh_call_kwargs = calls[6][1] + assert "batch_range" in seventh_call_kwargs + fourth_range = seventh_call_kwargs["batch_range"] + assert isinstance(fourth_range, ExpiredBatchRange) + assert isinstance(fourth_range.start, RowBoundary) + assert isinstance(fourth_range.end, LimitBoundary) + assert fourth_range.end.batch_size == 2 + # Should have progressed from the third batch + assert fourth_range.start.updated_ts >= third_range.start.updated_ts + assert fourth_range.start.name == '"model_0"' + + # Verify delete_expired_snapshots was called 3 times (once per batch) + assert delete_expired_spy.call_count == 3 + + # Verify each delete call used a batch_range + delete_calls = delete_expired_spy.call_args_list + + # First call should have a batch_range matching the first batch + first_delete_kwargs = delete_calls[0][1] + assert "batch_range" in first_delete_kwargs + first_delete_range = first_delete_kwargs["batch_range"] + assert isinstance(first_delete_range, ExpiredBatchRange) + assert isinstance(first_delete_range.start, RowBoundary) + assert first_delete_range.start.updated_ts == 0 + assert isinstance(first_delete_range.end, RowBoundary) + assert first_delete_range.end.updated_ts == second_range.start.updated_ts + assert first_delete_range.end.name == second_range.start.name + assert first_delete_range.end.identifier == second_range.start.identifier + + second_delete_kwargs = delete_calls[1][1] + assert "batch_range" in second_delete_kwargs + second_delete_range = second_delete_kwargs["batch_range"] + assert isinstance(second_delete_range, ExpiredBatchRange) + assert isinstance(second_delete_range.start, RowBoundary) + assert second_delete_range.start.updated_ts == 0 + assert isinstance(second_delete_range.end, RowBoundary) + assert second_delete_range.end.updated_ts == third_range.start.updated_ts + assert second_delete_range.end.name == third_range.start.name + assert second_delete_range.end.identifier == third_range.start.identifier + + third_delete_kwargs = delete_calls[2][1] + assert "batch_range" in third_delete_kwargs + third_delete_range = third_delete_kwargs["batch_range"] + assert isinstance(third_delete_range, ExpiredBatchRange) + assert isinstance(third_delete_range.start, RowBoundary) + assert third_delete_range.start.updated_ts == 0 + assert isinstance(third_delete_range.end, RowBoundary) + assert third_delete_range.end.updated_ts == fourth_range.start.updated_ts + assert third_delete_range.end.name == fourth_range.start.name + assert third_delete_range.end.identifier == fourth_range.start.identifier + # Verify the cleanup method was called for each batch that had cleanup tasks + assert mock_evaluator.cleanup.call_count >= 1 + + # Verify all snapshots were deleted in the end + remaining = state_sync.get_snapshots(snapshots) + assert len(remaining) == 0 + + def test_delete_expired_snapshots_seed( state_sync: EngineAdapterStateSync, make_snapshot: t.Callable ): @@ -1187,10 +1694,10 @@ def test_delete_expired_snapshots_seed( state_sync.push_snapshots(all_snapshots) assert set(state_sync.get_snapshots(all_snapshots)) == {snapshot.snapshot_id} - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) @@ -1228,11 +1735,11 @@ def test_delete_expired_snapshots_batching( snapshot_b.snapshot_id, } - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot_a.table_info, dev_table_only=False), SnapshotTableCleanupTask(snapshot=snapshot_b.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) @@ -1265,8 +1772,8 @@ def test_delete_expired_snapshots_promoted( state_sync.promote(env) all_snapshots = [snapshot] - assert not state_sync.get_expired_snapshots() - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert set(state_sync.get_snapshots(all_snapshots)) == {snapshot.snapshot_id} env.snapshots_ = [] @@ -1275,10 +1782,10 @@ def test_delete_expired_snapshots_promoted( now_timestamp_mock = mocker.patch("sqlmesh.core.state_sync.db.facade.now_timestamp") now_timestamp_mock.return_value = now_timestamp() + 11000 - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=False) ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) @@ -1315,10 +1822,10 @@ def test_delete_expired_snapshots_dev_table_cleanup_only( new_snapshot.snapshot_id, } - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True) ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert set(state_sync.get_snapshots(all_snapshots)) == {new_snapshot.snapshot_id} @@ -1357,8 +1864,8 @@ def test_delete_expired_snapshots_shared_dev_table( new_snapshot.snapshot_id, } - assert not state_sync.get_expired_snapshots() # No dev table cleanup - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) # No dev table cleanup + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert set(state_sync.get_snapshots(all_snapshots)) == {new_snapshot.snapshot_id} @@ -1403,16 +1910,18 @@ def test_delete_expired_snapshots_ignore_ttl( state_sync.promote(env) # default TTL = 1 week, nothing to clean up yet if we take TTL into account - assert not state_sync.get_expired_snapshots() - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert state_sync.snapshots_exist([snapshot_c.snapshot_id]) == {snapshot_c.snapshot_id} # If we ignore TTL, only snapshot_c should get cleaned up because snapshot_a and snapshot_b are part of an environment assert snapshot_a.table_info != snapshot_b.table_info != snapshot_c.table_info - assert state_sync.get_expired_snapshots(ignore_ttl=True) == [ + assert _get_cleanup_tasks(state_sync, ignore_ttl=True) == [ SnapshotTableCleanupTask(snapshot=snapshot_c.table_info, dev_table_only=False) ] - state_sync.delete_expired_snapshots(ignore_ttl=True) + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange.all_batch_range(), ignore_ttl=True + ) assert not state_sync.snapshots_exist([snapshot_c.snapshot_id]) @@ -1476,11 +1985,11 @@ def test_delete_expired_snapshots_cleanup_intervals( ] assert not stored_new_snapshot.dev_intervals - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True), SnapshotTableCleanupTask(snapshot=new_snapshot.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not get_snapshot_intervals(snapshot) @@ -1564,10 +2073,10 @@ def test_delete_expired_snapshots_cleanup_intervals_shared_version( ) # Delete the expired snapshot - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots([snapshot]) # Check new snapshot's intervals @@ -1684,8 +2193,8 @@ def test_delete_expired_snapshots_cleanup_intervals_shared_dev_version( ) # Delete the expired snapshot - assert state_sync.get_expired_snapshots() == [] - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots([snapshot]) # Check new snapshot's intervals @@ -1778,10 +2287,10 @@ def test_compact_intervals_after_cleanup( state_sync.add_interval(snapshot_c, "2023-01-07", "2023-01-09", is_dev=True) # Only the dev table of the original snapshot should be deleted - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot_a.table_info, dev_table_only=True), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert state_sync.engine_adapter.fetchone("SELECT COUNT(*) FROM sqlmesh._intervals")[0] == 5 # type: ignore diff --git a/tests/core/test_context.py b/tests/core/test_context.py index 6270cec56a..60ea3fd451 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -1030,7 +1030,7 @@ def test_janitor(sushi_context, mocker: MockerFixture) -> None: sushi_context._engine_adapter = adapter_mock sushi_context.engine_adapters = {sushi_context.config.default_gateway: adapter_mock} sushi_context._state_sync = state_sync_mock - state_sync_mock.get_expired_snapshots.return_value = [] + state_sync_mock.get_expired_snapshots.return_value = None sushi_context._run_janitor() # Assert that the schemas are dropped just twice for the schema based environment From 5edf5389749bf5489d7205db1f32e16990d343ab Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Wed, 8 Oct 2025 09:49:24 -0700 Subject: [PATCH 066/173] fix: remove state sync from migrate (#5502) --- sqlmesh/core/state_sync/db/facade.py | 2 +- sqlmesh/core/state_sync/db/migrator.py | 11 +++++------ sqlmesh/migrations/v0000_baseline.py | 9 +++------ .../migrations/v0061_mysql_fix_blob_text_type.py | 7 ++----- sqlmesh/migrations/v0062_add_model_gateway.py | 4 ++-- sqlmesh/migrations/v0063_change_signals.py | 6 ++---- .../v0064_join_when_matched_strings.py | 6 ++---- sqlmesh/migrations/v0065_add_model_optimize.py | 4 ++-- .../migrations/v0066_add_auto_restatements.py | 8 ++------ .../v0067_add_tsql_date_full_precision.py | 4 ++-- ..._include_unrendered_query_in_metadata_hash.py | 4 ++-- .../migrations/v0069_update_dev_table_suffix.py | 6 ++---- .../v0070_include_grains_in_metadata_hash.py | 4 ++-- .../v0071_add_dev_version_to_intervals.py | 8 ++------ .../v0072_add_environment_statements.py | 6 ++---- .../v0073_remove_symbolic_disable_restatement.py | 6 ++---- ...0074_add_partition_by_time_column_property.py | 4 ++-- .../migrations/v0075_remove_validate_query.py | 6 ++---- sqlmesh/migrations/v0076_add_cron_tz.py | 4 ++-- .../v0077_fix_column_type_hash_calculation.py | 4 ++-- .../v0078_warn_if_non_migratable_python_env.py | 6 ++---- .../v0079_add_gateway_managed_property.py | 16 +++++++--------- .../v0080_add_batch_size_to_scd_type_2_models.py | 4 ++-- .../migrations/v0081_update_partitioned_by.py | 6 ++---- ..._warn_if_incorrectly_duplicated_statements.py | 6 ++---- ...3_use_sql_for_scd_time_data_type_data_hash.py | 4 ++-- ...malize_quote_when_matched_and_merge_filter.py | 4 ++-- sqlmesh/migrations/v0085_deterministic_repr.py | 6 ++---- .../migrations/v0086_check_deterministic_bug.py | 6 ++---- .../v0087_normalize_blueprint_variables.py | 6 ++---- ...v0088_warn_about_variable_python_env_diffs.py | 6 ++---- .../v0089_add_virtual_environment_mode.py | 4 ++-- .../migrations/v0090_add_forward_only_column.py | 8 ++------ sqlmesh/migrations/v0091_on_additive_change.py | 4 ++-- .../v0092_warn_about_dbt_data_type_diff.py | 6 ++---- .../v0093_use_raw_sql_in_fingerprint.py | 4 ++-- ...94_add_dev_version_and_fingerprint_columns.py | 8 ++------ .../v0095_warn_about_dbt_raw_sql_diff.py | 6 ++---- .../migrations/v0096_remove_plan_dags_table.py | 6 ++---- sqlmesh/migrations/v0097_add_dbt_name_in_node.py | 4 ++-- .../v0098_add_dbt_node_info_in_node.py | 6 ++---- .../v0099_add_last_altered_to_intervals.py | 6 ++---- .../v0100_add_grants_and_grants_target_layer.py | 4 ++-- 43 files changed, 94 insertions(+), 155 deletions(-) diff --git a/sqlmesh/core/state_sync/db/facade.py b/sqlmesh/core/state_sync/db/facade.py index 49f7b5b92f..64042624f3 100644 --- a/sqlmesh/core/state_sync/db/facade.py +++ b/sqlmesh/core/state_sync/db/facade.py @@ -469,7 +469,7 @@ def migrate( ) -> None: """Migrate the state sync to the latest SQLMesh / SQLGlot version.""" self.migrator.migrate( - self, + self.schema, skip_backup=skip_backup, promoted_snapshots_only=promoted_snapshots_only, ) diff --git a/sqlmesh/core/state_sync/db/migrator.py b/sqlmesh/core/state_sync/db/migrator.py index 3e3f978b96..ad60c57570 100644 --- a/sqlmesh/core/state_sync/db/migrator.py +++ b/sqlmesh/core/state_sync/db/migrator.py @@ -30,7 +30,6 @@ MIN_SCHEMA_VERSION, MIN_SQLMESH_VERSION, ) -from sqlmesh.core.state_sync.base import StateSync from sqlmesh.core.state_sync.db.environment import EnvironmentState from sqlmesh.core.state_sync.db.interval import IntervalState from sqlmesh.core.state_sync.db.snapshot import SnapshotState @@ -85,7 +84,7 @@ def __init__( def migrate( self, - state_sync: StateSync, + schema: t.Optional[str], skip_backup: bool = False, promoted_snapshots_only: bool = True, ) -> None: @@ -94,7 +93,7 @@ def migrate( migration_start_ts = time.perf_counter() try: - migrate_rows = self._apply_migrations(state_sync, skip_backup) + migrate_rows = self._apply_migrations(schema, skip_backup) if not migrate_rows and major_minor(SQLMESH_VERSION) == versions.minor_sqlmesh_version: return @@ -153,7 +152,7 @@ def rollback(self) -> None: def _apply_migrations( self, - state_sync: StateSync, + schema: t.Optional[str], skip_backup: bool, ) -> bool: versions = self.version_state.get_versions() @@ -184,10 +183,10 @@ def _apply_migrations( for migration in migrations: logger.info(f"Applying migration {migration}") - migration.migrate_schemas(state_sync) + migration.migrate_schemas(engine_adapter=self.engine_adapter, schema=schema) if state_table_exist: # No need to run DML for the initial migration since all tables are empty - migration.migrate_rows(state_sync) + migration.migrate_rows(engine_adapter=self.engine_adapter, schema=schema) snapshot_count_after = self.snapshot_state.count() diff --git a/sqlmesh/migrations/v0000_baseline.py b/sqlmesh/migrations/v0000_baseline.py index 4891900a76..abd316fcfe 100644 --- a/sqlmesh/migrations/v0000_baseline.py +++ b/sqlmesh/migrations/v0000_baseline.py @@ -4,15 +4,12 @@ from sqlmesh.utils.migration import blob_text_type, index_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - schema = state_sync.schema - engine_adapter = state_sync.engine_adapter - +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" snapshots_table = "_snapshots" environments_table = "_environments" versions_table = "_versions" - if state_sync.schema: + if schema: engine_adapter.create_schema(schema) intervals_table = f"{schema}.{intervals_table}" snapshots_table = f"{schema}.{snapshots_table}" @@ -94,5 +91,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.create_index(intervals_table, "_intervals_name_version_idx", ("name", "version")) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py b/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py index 34b765b3ad..897974f09a 100644 --- a/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +++ b/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py @@ -9,12 +9,9 @@ from sqlmesh.utils.migration import blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore if engine_adapter.dialect != "mysql": return - - schema = state_sync.schema environments_table = "_environments" snapshots_table = "_snapshots" @@ -46,5 +43,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0062_add_model_gateway.py b/sqlmesh/migrations/v0062_add_model_gateway.py index 524a94044a..f65d8224ec 100644 --- a/sqlmesh/migrations/v0062_add_model_gateway.py +++ b/sqlmesh/migrations/v0062_add_model_gateway.py @@ -1,9 +1,9 @@ """Add the gateway model attribute.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0063_change_signals.py b/sqlmesh/migrations/v0063_change_signals.py index 8806c9ea60..bbced547fd 100644 --- a/sqlmesh/migrations/v0063_change_signals.py +++ b/sqlmesh/migrations/v0063_change_signals.py @@ -7,15 +7,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0064_join_when_matched_strings.py b/sqlmesh/migrations/v0064_join_when_matched_strings.py index 6da3164a38..ffd4c94913 100644 --- a/sqlmesh/migrations/v0064_join_when_matched_strings.py +++ b/sqlmesh/migrations/v0064_join_when_matched_strings.py @@ -7,15 +7,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0065_add_model_optimize.py b/sqlmesh/migrations/v0065_add_model_optimize.py index 09240aa61e..e9bc646666 100644 --- a/sqlmesh/migrations/v0065_add_model_optimize.py +++ b/sqlmesh/migrations/v0065_add_model_optimize.py @@ -1,9 +1,9 @@ """Add the optimize_query model attribute.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0066_add_auto_restatements.py b/sqlmesh/migrations/v0066_add_auto_restatements.py index 96d2cd45e8..9eea773573 100644 --- a/sqlmesh/migrations/v0066_add_auto_restatements.py +++ b/sqlmesh/migrations/v0066_add_auto_restatements.py @@ -5,9 +5,7 @@ from sqlmesh.utils.migration import index_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore auto_restatements_table = "_auto_restatements" intervals_table = "_intervals" @@ -40,9 +38,7 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" if schema: diff --git a/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py b/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py index d4fd93eda4..1243118df0 100644 --- a/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +++ b/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py @@ -1,9 +1,9 @@ """Add full precision for tsql to support nanoseconds.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py b/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py index 6f7ddbdc1c..35142e9aeb 100644 --- a/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +++ b/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py @@ -1,9 +1,9 @@ """Include the unrendered query in the metadata hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0069_update_dev_table_suffix.py b/sqlmesh/migrations/v0069_update_dev_table_suffix.py index 57b41a816c..f69aac434e 100644 --- a/sqlmesh/migrations/v0069_update_dev_table_suffix.py +++ b/sqlmesh/migrations/v0069_update_dev_table_suffix.py @@ -7,15 +7,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" environments_table = "_environments" if schema: diff --git a/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py b/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py index 4b339d8e97..d0dbdd5563 100644 --- a/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +++ b/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py @@ -1,9 +1,9 @@ """Include grains in the metadata hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py b/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py index 4e6cbab4f0..61a49dc0b9 100644 --- a/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +++ b/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py @@ -8,9 +8,7 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" if schema: intervals_table = f"{schema}.{intervals_table}" @@ -29,9 +27,7 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" snapshots_table = "_snapshots" if schema: diff --git a/sqlmesh/migrations/v0072_add_environment_statements.py b/sqlmesh/migrations/v0072_add_environment_statements.py index e73faf2b9a..4ed52b5c47 100644 --- a/sqlmesh/migrations/v0072_add_environment_statements.py +++ b/sqlmesh/migrations/v0072_add_environment_statements.py @@ -5,9 +5,7 @@ from sqlmesh.utils.migration import blob_text_type, index_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore environment_statements_table = "_environment_statements" if schema: @@ -27,5 +25,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore ) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py b/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py index 40e74d6426..708693ed61 100644 --- a/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +++ b/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py @@ -6,15 +6,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py b/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py index 04f1a27254..acd349c888 100644 --- a/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +++ b/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py @@ -2,9 +2,9 @@ (default: True to keep the original behaviour)""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0075_remove_validate_query.py b/sqlmesh/migrations/v0075_remove_validate_query.py index f6d4e255d9..9fdcca7ea6 100644 --- a/sqlmesh/migrations/v0075_remove_validate_query.py +++ b/sqlmesh/migrations/v0075_remove_validate_query.py @@ -8,15 +8,13 @@ from sqlmesh.utils.migration import blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0076_add_cron_tz.py b/sqlmesh/migrations/v0076_add_cron_tz.py index 300474aa18..909017c8cd 100644 --- a/sqlmesh/migrations/v0076_add_cron_tz.py +++ b/sqlmesh/migrations/v0076_add_cron_tz.py @@ -1,9 +1,9 @@ """Add 'cron_tz' property to node definition.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py b/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py index 2aec1140f1..68953836bd 100644 --- a/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +++ b/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py @@ -1,9 +1,9 @@ """Use the model's dialect when calculating the hash for the column types.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py b/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py index c24b6a5168..adf1e96dd0 100644 --- a/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +++ b/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py @@ -24,13 +24,11 @@ from sqlmesh.core.console import get_console -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0079_add_gateway_managed_property.py b/sqlmesh/migrations/v0079_add_gateway_managed_property.py index 8d24601102..7650d6d765 100644 --- a/sqlmesh/migrations/v0079_add_gateway_managed_property.py +++ b/sqlmesh/migrations/v0079_add_gateway_managed_property.py @@ -3,11 +3,10 @@ from sqlglot import exp -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore environments_table = "_environments" - if state_sync.schema: - environments_table = f"{state_sync.schema}.{environments_table}" + if schema: + environments_table = f"{schema}.{environments_table}" alter_table_exp = exp.Alter( this=exp.to_table(environments_table), @@ -22,13 +21,12 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore environments_table = "_environments" - if state_sync.schema: - environments_table = f"{state_sync.schema}.{environments_table}" + if schema: + environments_table = f"{schema}.{environments_table}" - state_sync.engine_adapter.update_table( + engine_adapter.update_table( environments_table, {"gateway_managed": False}, where=exp.true(), diff --git a/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py b/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py index 582bdd3da9..35cb3977cc 100644 --- a/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +++ b/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py @@ -1,9 +1,9 @@ """Add batch_size to SCD Type 2 models and add updated_at_name to by time which changes their data hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0081_update_partitioned_by.py b/sqlmesh/migrations/v0081_update_partitioned_by.py index 611d8f6973..8740285bf0 100644 --- a/sqlmesh/migrations/v0081_update_partitioned_by.py +++ b/sqlmesh/migrations/v0081_update_partitioned_by.py @@ -8,15 +8,13 @@ from sqlmesh.utils.migration import blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py b/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py index 6eadbfc2c3..5565b099cd 100644 --- a/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +++ b/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py @@ -34,13 +34,11 @@ from sqlmesh.core.console import get_console -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py b/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py index 38c84afafd..5dbe0847f9 100644 --- a/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +++ b/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py @@ -1,9 +1,9 @@ """Use sql(...) instead of gen when computing the data hash of the time data type.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py b/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py index 5401c97d77..9edb0051ba 100644 --- a/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +++ b/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py @@ -5,9 +5,9 @@ """ -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0085_deterministic_repr.py b/sqlmesh/migrations/v0085_deterministic_repr.py index 1a90277bbe..81cb0f194e 100644 --- a/sqlmesh/migrations/v0085_deterministic_repr.py +++ b/sqlmesh/migrations/v0085_deterministic_repr.py @@ -36,15 +36,13 @@ def _dict_sort(obj: t.Any) -> str: return repr(obj) -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0086_check_deterministic_bug.py b/sqlmesh/migrations/v0086_check_deterministic_bug.py index 0679414881..f44e5b8e33 100644 --- a/sqlmesh/migrations/v0086_check_deterministic_bug.py +++ b/sqlmesh/migrations/v0086_check_deterministic_bug.py @@ -10,13 +10,11 @@ KEYS_TO_MAKE_DETERMINISTIC = ["__sqlmesh__vars__", "__sqlmesh__blueprint__vars__"] -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" versions_table = "_versions" if schema: diff --git a/sqlmesh/migrations/v0087_normalize_blueprint_variables.py b/sqlmesh/migrations/v0087_normalize_blueprint_variables.py index 2f23a0653e..fe737861c2 100644 --- a/sqlmesh/migrations/v0087_normalize_blueprint_variables.py +++ b/sqlmesh/migrations/v0087_normalize_blueprint_variables.py @@ -35,15 +35,13 @@ class SqlValue: sql: str -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py b/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py index 405aad725f..0aa7171821 100644 --- a/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +++ b/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py @@ -35,13 +35,11 @@ METADATA_HASH_EXPRESSIONS = {"on_virtual_update", "audits", "signals", "audit_definitions"} -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0089_add_virtual_environment_mode.py b/sqlmesh/migrations/v0089_add_virtual_environment_mode.py index 63d491418f..88126c76d7 100644 --- a/sqlmesh/migrations/v0089_add_virtual_environment_mode.py +++ b/sqlmesh/migrations/v0089_add_virtual_environment_mode.py @@ -1,9 +1,9 @@ """Add virtual_environment_mode to the model definition.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0090_add_forward_only_column.py b/sqlmesh/migrations/v0090_add_forward_only_column.py index b68c0f65ea..48253691ec 100644 --- a/sqlmesh/migrations/v0090_add_forward_only_column.py +++ b/sqlmesh/migrations/v0090_add_forward_only_column.py @@ -7,9 +7,7 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" @@ -27,11 +25,9 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0091_on_additive_change.py b/sqlmesh/migrations/v0091_on_additive_change.py index c0170bd438..e24b9b4122 100644 --- a/sqlmesh/migrations/v0091_on_additive_change.py +++ b/sqlmesh/migrations/v0091_on_additive_change.py @@ -1,9 +1,9 @@ """Add on_additive_change to incremental model metadata hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py index 1ff069bc82..02e2a5f4c1 100644 --- a/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +++ b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py @@ -17,13 +17,11 @@ SQLMESH_DBT_PACKAGE = "sqlmesh.dbt" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py b/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py index f629c1d27d..aaaacf3a91 100644 --- a/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +++ b/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py @@ -1,9 +1,9 @@ """Use the raw SQL when computing the model fingerprint.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py b/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py index 1abc4fa4af..9d7adf21a3 100644 --- a/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +++ b/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py @@ -7,9 +7,7 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" @@ -42,11 +40,9 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(add_fingerprint_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py b/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py index 802d996df5..0fa9fd51b8 100644 --- a/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +++ b/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py @@ -17,13 +17,11 @@ SQLMESH_DBT_PACKAGE = "sqlmesh.dbt" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0096_remove_plan_dags_table.py b/sqlmesh/migrations/v0096_remove_plan_dags_table.py index e342d6b1a8..8eb674ead0 100644 --- a/sqlmesh/migrations/v0096_remove_plan_dags_table.py +++ b/sqlmesh/migrations/v0096_remove_plan_dags_table.py @@ -1,9 +1,7 @@ """Remove the obsolete _plan_dags table.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore plan_dags_table = "_plan_dags" if schema: plan_dags_table = f"{schema}.{plan_dags_table}" @@ -11,5 +9,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.drop_table(plan_dags_table) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0097_add_dbt_name_in_node.py b/sqlmesh/migrations/v0097_add_dbt_name_in_node.py index f8909e4430..cd548977ef 100644 --- a/sqlmesh/migrations/v0097_add_dbt_name_in_node.py +++ b/sqlmesh/migrations/v0097_add_dbt_name_in_node.py @@ -1,9 +1,9 @@ """Add 'dbt_name' property to node definition.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py b/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py index c8acd0bafd..b69ba8fa6f 100644 --- a/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py +++ b/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py @@ -5,15 +5,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py b/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py index 1a119a338d..b80ed35a35 100644 --- a/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py +++ b/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py @@ -3,9 +3,7 @@ from sqlglot import exp -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" if schema: intervals_table = f"{schema}.{intervals_table}" @@ -23,5 +21,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py b/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py index fa23935da0..9ff64c5e57 100644 --- a/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py +++ b/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py @@ -1,9 +1,9 @@ """Add grants and grants_target_layer to incremental model metadata hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass From 6fb471f2eff08796f349755b47597d822d8761af Mon Sep 17 00:00:00 2001 From: Trey Spiller <1831878+treysp@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:43:30 -0500 Subject: [PATCH 067/173] Feat: add dbt builtin global try_or_compiler_error (#5504) --- sqlmesh/dbt/builtin.py | 17 +++++++++++++++++ tests/dbt/test_transformation.py | 23 +++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/sqlmesh/dbt/builtin.py b/sqlmesh/dbt/builtin.py index b8180bc011..145e29a96c 100644 --- a/sqlmesh/dbt/builtin.py +++ b/sqlmesh/dbt/builtin.py @@ -50,6 +50,22 @@ def warn(self, msg: str) -> str: return "" +def try_or_compiler_error( + message_if_exception: str, func: t.Callable, *args: t.Any, **kwargs: t.Any +) -> t.Any: + try: + return func(*args, **kwargs) + except Exception: + if DBT_VERSION >= (1, 4, 0): + from dbt.exceptions import CompilationError + + raise CompilationError(message_if_exception) + else: + from dbt.exceptions import CompilationException # type: ignore + + raise CompilationException(message_if_exception) + + class Api: def __init__(self, dialect: t.Optional[str]) -> None: if dialect: @@ -411,6 +427,7 @@ def debug() -> str: "sqlmesh_incremental": True, "tojson": to_json, "toyaml": to_yaml, + "try_or_compiler_error": try_or_compiler_error, "zip": do_zip, "zip_strict": lambda *args: list(zip(*args)), } diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index e519713d26..304ac57731 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -1592,6 +1592,29 @@ def test_exceptions(sushi_test_project: Project): context.render('{{ exceptions.raise_compiler_error("Error") }}') +@pytest.mark.xdist_group("dbt_manifest") +def test_try_or_compiler_error(sushi_test_project: Project): + context = sushi_test_project.context + + result = context.render( + '{{ try_or_compiler_error("Error message", modules.datetime.datetime.strptime, "2023-01-15", "%Y-%m-%d") }}' + ) + assert "2023-01-15" in result + + with pytest.raises(CompilationError, match="Invalid date format"): + context.render( + '{{ try_or_compiler_error("Invalid date format", modules.datetime.datetime.strptime, "invalid", "%Y-%m-%d") }}' + ) + + # built-in macro calling try_or_compiler_error works + result = context.render( + '{{ dbt.dates_in_range("2023-01-01", "2023-01-03", "%Y-%m-%d", "%Y-%m-%d") }}' + ) + assert "2023-01-01" in result + assert "2023-01-02" in result + assert "2023-01-03" in result + + @pytest.mark.xdist_group("dbt_manifest") def test_modules(sushi_test_project: Project): context = sushi_test_project.context From e1510cec7776985b8549bfa6fbd82b2fba66fbb3 Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Wed, 8 Oct 2025 11:45:26 -0700 Subject: [PATCH 068/173] chore: move janitor functions to janitor.py (#5510) --- sqlmesh/core/context.py | 3 +- sqlmesh/core/janitor.py | 181 +++++++++++++++ sqlmesh/core/state_sync/__init__.py | 1 - sqlmesh/core/state_sync/common.py | 169 +------------- tests/core/state_sync/test_state_sync.py | 252 +------------------- tests/core/test_janitor.py | 282 +++++++++++++++++++++++ 6 files changed, 466 insertions(+), 422 deletions(-) create mode 100644 sqlmesh/core/janitor.py create mode 100644 tests/core/test_janitor.py diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index bd8647f811..d118116f7f 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -107,9 +107,8 @@ CachingStateSync, StateReader, StateSync, - cleanup_expired_views, ) -from sqlmesh.core.state_sync.common import delete_expired_snapshots +from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots from sqlmesh.core.table_diff import TableDiff from sqlmesh.core.test import ( ModelTextTestResult, diff --git a/sqlmesh/core/janitor.py b/sqlmesh/core/janitor.py new file mode 100644 index 0000000000..e050d6ef6c --- /dev/null +++ b/sqlmesh/core/janitor.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import typing as t + +from sqlglot import exp + +from sqlmesh.core.engine_adapter import EngineAdapter +from sqlmesh.core.console import Console +from sqlmesh.core.dialect import schema_ +from sqlmesh.core.environment import Environment +from sqlmesh.core.snapshot import SnapshotEvaluator +from sqlmesh.core.state_sync import StateSync +from sqlmesh.core.state_sync.common import ( + logger, + iter_expired_snapshot_batches, + RowBoundary, + ExpiredBatchRange, +) +from sqlmesh.utils.errors import SQLMeshError + + +def cleanup_expired_views( + default_adapter: EngineAdapter, + engine_adapters: t.Dict[str, EngineAdapter], + environments: t.List[Environment], + warn_on_delete_failure: bool = False, + console: t.Optional[Console] = None, +) -> None: + expired_schema_or_catalog_environments = [ + environment + for environment in environments + if environment.suffix_target.is_schema or environment.suffix_target.is_catalog + ] + expired_table_environments = [ + environment for environment in environments if environment.suffix_target.is_table + ] + + # We have to use the corresponding adapter if the virtual layer is gateway managed + def get_adapter(gateway_managed: bool, gateway: t.Optional[str] = None) -> EngineAdapter: + if gateway_managed and gateway: + return engine_adapters.get(gateway, default_adapter) + return default_adapter + + catalogs_to_drop: t.Set[t.Tuple[EngineAdapter, str]] = set() + schemas_to_drop: t.Set[t.Tuple[EngineAdapter, exp.Table]] = set() + + # Collect schemas and catalogs to drop + for engine_adapter, expired_catalog, expired_schema, suffix_target in { + ( + (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), + snapshot.qualified_view_name.catalog_for_environment( + environment.naming_info, dialect=engine_adapter.dialect + ), + snapshot.qualified_view_name.schema_for_environment( + environment.naming_info, dialect=engine_adapter.dialect + ), + environment.suffix_target, + ) + for environment in expired_schema_or_catalog_environments + for snapshot in environment.snapshots + if snapshot.is_model and not snapshot.is_symbolic + }: + if suffix_target.is_catalog: + if expired_catalog: + catalogs_to_drop.add((engine_adapter, expired_catalog)) + else: + schema = schema_(expired_schema, expired_catalog) + schemas_to_drop.add((engine_adapter, schema)) + + # Drop the views for the expired environments + for engine_adapter, expired_view in { + ( + (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), + snapshot.qualified_view_name.for_environment( + environment.naming_info, dialect=engine_adapter.dialect + ), + ) + for environment in expired_table_environments + for snapshot in environment.snapshots + if snapshot.is_model and not snapshot.is_symbolic + }: + try: + engine_adapter.drop_view(expired_view, ignore_if_not_exists=True) + if console: + console.update_cleanup_progress(expired_view) + except Exception as e: + message = f"Failed to drop the expired environment view '{expired_view}': {e}" + if warn_on_delete_failure: + logger.warning(message) + else: + raise SQLMeshError(message) from e + + # Drop the schemas for the expired environments + for engine_adapter, schema in schemas_to_drop: + try: + engine_adapter.drop_schema( + schema, + ignore_if_not_exists=True, + cascade=True, + ) + if console: + console.update_cleanup_progress(schema.sql(dialect=engine_adapter.dialect)) + except Exception as e: + message = f"Failed to drop the expired environment schema '{schema}': {e}" + if warn_on_delete_failure: + logger.warning(message) + else: + raise SQLMeshError(message) from e + + # Drop any catalogs that were associated with a snapshot where the engine adapter supports dropping catalogs + # catalogs_to_drop is only populated when environment_suffix_target is set to 'catalog' + for engine_adapter, catalog in catalogs_to_drop: + if engine_adapter.SUPPORTS_CREATE_DROP_CATALOG: + try: + engine_adapter.drop_catalog(catalog) + if console: + console.update_cleanup_progress(catalog) + except Exception as e: + message = f"Failed to drop the expired environment catalog '{catalog}': {e}" + if warn_on_delete_failure: + logger.warning(message) + else: + raise SQLMeshError(message) from e + + +def delete_expired_snapshots( + state_sync: StateSync, + snapshot_evaluator: SnapshotEvaluator, + *, + current_ts: int, + ignore_ttl: bool = False, + batch_size: t.Optional[int] = None, + console: t.Optional[Console] = None, +) -> None: + """Delete all expired snapshots in batches. + + This helper function encapsulates the logic for deleting expired snapshots in batches, + eliminating code duplication across different use cases. + + Args: + state_sync: StateSync instance to query and delete expired snapshots from. + snapshot_evaluator: SnapshotEvaluator instance to clean up tables associated with snapshots. + current_ts: Timestamp used to evaluate expiration. + ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). + batch_size: Maximum number of snapshots to fetch per batch. + console: Optional console for reporting progress. + + Returns: + The total number of deleted expired snapshots. + """ + num_expired_snapshots = 0 + for batch in iter_expired_snapshot_batches( + state_reader=state_sync, + current_ts=current_ts, + ignore_ttl=ignore_ttl, + batch_size=batch_size, + ): + end_info = ( + f"updated_ts={batch.batch_range.end.updated_ts}" + if isinstance(batch.batch_range.end, RowBoundary) + else f"limit={batch.batch_range.end.batch_size}" + ) + logger.info( + "Processing batch of size %s with end %s", + len(batch.expired_snapshot_ids), + end_info, + ) + snapshot_evaluator.cleanup( + target_snapshots=batch.cleanup_tasks, + on_complete=console.update_cleanup_progress if console else None, + ) + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=batch.batch_range.end, + ), + ignore_ttl=ignore_ttl, + ) + logger.info("Cleaned up expired snapshots batch") + num_expired_snapshots += len(batch.expired_snapshot_ids) + logger.info("Cleaned up %s expired snapshots", num_expired_snapshots) diff --git a/sqlmesh/core/state_sync/__init__.py b/sqlmesh/core/state_sync/__init__.py index 1585d6211f..12ea77ac8f 100644 --- a/sqlmesh/core/state_sync/__init__.py +++ b/sqlmesh/core/state_sync/__init__.py @@ -20,5 +20,4 @@ Versions as Versions, ) from sqlmesh.core.state_sync.cache import CachingStateSync as CachingStateSync -from sqlmesh.core.state_sync.common import cleanup_expired_views as cleanup_expired_views from sqlmesh.core.state_sync.db import EngineAdapterStateSync as EngineAdapterStateSync diff --git a/sqlmesh/core/state_sync/common.py b/sqlmesh/core/state_sync/common.py index 3fdd0bc015..056565b060 100644 --- a/sqlmesh/core/state_sync/common.py +++ b/sqlmesh/core/state_sync/common.py @@ -11,132 +11,23 @@ from pydantic_core.core_schema import ValidationInfo from sqlglot import exp -from sqlmesh.core.console import Console -from sqlmesh.core.dialect import schema_ from sqlmesh.utils.pydantic import PydanticModel, field_validator from sqlmesh.core.environment import Environment, EnvironmentStatements, EnvironmentNamingInfo -from sqlmesh.utils.errors import SQLMeshError from sqlmesh.core.snapshot import ( Snapshot, - SnapshotEvaluator, SnapshotId, SnapshotTableCleanupTask, SnapshotTableInfo, ) if t.TYPE_CHECKING: - from sqlmesh.core.engine_adapter.base import EngineAdapter - from sqlmesh.core.state_sync.base import Versions, StateReader, StateSync + from sqlmesh.core.state_sync.base import Versions, StateReader logger = logging.getLogger(__name__) EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE = 200 -def cleanup_expired_views( - default_adapter: EngineAdapter, - engine_adapters: t.Dict[str, EngineAdapter], - environments: t.List[Environment], - warn_on_delete_failure: bool = False, - console: t.Optional[Console] = None, -) -> None: - expired_schema_or_catalog_environments = [ - environment - for environment in environments - if environment.suffix_target.is_schema or environment.suffix_target.is_catalog - ] - expired_table_environments = [ - environment for environment in environments if environment.suffix_target.is_table - ] - - # We have to use the corresponding adapter if the virtual layer is gateway managed - def get_adapter(gateway_managed: bool, gateway: t.Optional[str] = None) -> EngineAdapter: - if gateway_managed and gateway: - return engine_adapters.get(gateway, default_adapter) - return default_adapter - - catalogs_to_drop: t.Set[t.Tuple[EngineAdapter, str]] = set() - schemas_to_drop: t.Set[t.Tuple[EngineAdapter, exp.Table]] = set() - - # Collect schemas and catalogs to drop - for engine_adapter, expired_catalog, expired_schema, suffix_target in { - ( - (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), - snapshot.qualified_view_name.catalog_for_environment( - environment.naming_info, dialect=engine_adapter.dialect - ), - snapshot.qualified_view_name.schema_for_environment( - environment.naming_info, dialect=engine_adapter.dialect - ), - environment.suffix_target, - ) - for environment in expired_schema_or_catalog_environments - for snapshot in environment.snapshots - if snapshot.is_model and not snapshot.is_symbolic - }: - if suffix_target.is_catalog: - if expired_catalog: - catalogs_to_drop.add((engine_adapter, expired_catalog)) - else: - schema = schema_(expired_schema, expired_catalog) - schemas_to_drop.add((engine_adapter, schema)) - - # Drop the views for the expired environments - for engine_adapter, expired_view in { - ( - (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), - snapshot.qualified_view_name.for_environment( - environment.naming_info, dialect=engine_adapter.dialect - ), - ) - for environment in expired_table_environments - for snapshot in environment.snapshots - if snapshot.is_model and not snapshot.is_symbolic - }: - try: - engine_adapter.drop_view(expired_view, ignore_if_not_exists=True) - if console: - console.update_cleanup_progress(expired_view) - except Exception as e: - message = f"Failed to drop the expired environment view '{expired_view}': {e}" - if warn_on_delete_failure: - logger.warning(message) - else: - raise SQLMeshError(message) from e - - # Drop the schemas for the expired environments - for engine_adapter, schema in schemas_to_drop: - try: - engine_adapter.drop_schema( - schema, - ignore_if_not_exists=True, - cascade=True, - ) - if console: - console.update_cleanup_progress(schema.sql(dialect=engine_adapter.dialect)) - except Exception as e: - message = f"Failed to drop the expired environment schema '{schema}': {e}" - if warn_on_delete_failure: - logger.warning(message) - else: - raise SQLMeshError(message) from e - - # Drop any catalogs that were associated with a snapshot where the engine adapter supports dropping catalogs - # catalogs_to_drop is only populated when environment_suffix_target is set to 'catalog' - for engine_adapter, catalog in catalogs_to_drop: - if engine_adapter.SUPPORTS_CREATE_DROP_CATALOG: - try: - engine_adapter.drop_catalog(catalog) - if console: - console.update_cleanup_progress(catalog) - except Exception as e: - message = f"Failed to drop the expired environment catalog '{catalog}': {e}" - if warn_on_delete_failure: - logger.warning(message) - else: - raise SQLMeshError(message) from e - - def transactional() -> t.Callable[[t.Callable], t.Callable]: def decorator(func: t.Callable) -> t.Callable: @wraps(func) @@ -429,61 +320,3 @@ def iter_expired_snapshot_batches( start=batch.batch_range.end, end=LimitBoundary(batch_size=batch_size), ) - - -def delete_expired_snapshots( - state_sync: StateSync, - snapshot_evaluator: SnapshotEvaluator, - *, - current_ts: int, - ignore_ttl: bool = False, - batch_size: t.Optional[int] = None, - console: t.Optional[Console] = None, -) -> None: - """Delete all expired snapshots in batches. - - This helper function encapsulates the logic for deleting expired snapshots in batches, - eliminating code duplication across different use cases. - - Args: - state_sync: StateSync instance to query and delete expired snapshots from. - snapshot_evaluator: SnapshotEvaluator instance to clean up tables associated with snapshots. - current_ts: Timestamp used to evaluate expiration. - ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). - batch_size: Maximum number of snapshots to fetch per batch. - console: Optional console for reporting progress. - - Returns: - The total number of deleted expired snapshots. - """ - num_expired_snapshots = 0 - for batch in iter_expired_snapshot_batches( - state_reader=state_sync, - current_ts=current_ts, - ignore_ttl=ignore_ttl, - batch_size=batch_size, - ): - end_info = ( - f"updated_ts={batch.batch_range.end.updated_ts}" - if isinstance(batch.batch_range.end, RowBoundary) - else f"limit={batch.batch_range.end.batch_size}" - ) - logger.info( - "Processing batch of size %s with end %s", - len(batch.expired_snapshot_ids), - end_info, - ) - snapshot_evaluator.cleanup( - target_snapshots=batch.cleanup_tasks, - on_complete=console.update_cleanup_progress if console else None, - ) - state_sync.delete_expired_snapshots( - batch_range=ExpiredBatchRange( - start=RowBoundary.lowest_boundary(), - end=batch.batch_range.end, - ), - ignore_ttl=ignore_ttl, - ) - logger.info("Cleaned up expired snapshots batch") - num_expired_snapshots += len(batch.expired_snapshot_ids) - logger.info("Cleaned up %s expired snapshots", num_expired_snapshots) diff --git a/tests/core/state_sync/test_state_sync.py b/tests/core/state_sync/test_state_sync.py index 199ca43ee9..bd01dfc652 100644 --- a/tests/core/state_sync/test_state_sync.py +++ b/tests/core/state_sync/test_state_sync.py @@ -13,19 +13,17 @@ from sqlmesh.core import constants as c from sqlmesh.core.config import EnvironmentSuffixTarget -from sqlmesh.core.dialect import parse_one, schema_ +from sqlmesh.core.dialect import parse_one from sqlmesh.core.engine_adapter import create_engine_adapter from sqlmesh.core.environment import Environment, EnvironmentStatements from sqlmesh.core.model import ( FullKind, IncrementalByTimeRangeKind, - ModelKindName, Seed, SeedKind, SeedModel, SqlModel, ) -from sqlmesh.core.model.definition import ExternalModel from sqlmesh.core.snapshot import ( Snapshot, SnapshotChangeCategory, @@ -38,7 +36,6 @@ from sqlmesh.core.state_sync import ( CachingStateSync, EngineAdapterStateSync, - cleanup_expired_views, ) from sqlmesh.core.state_sync.base import ( SCHEMA_VERSION, @@ -1524,154 +1521,6 @@ def test_expired_batch_range_where_filter_with_limit(): ) -def test_delete_expired_snapshots_common_function_batching( - state_sync: EngineAdapterStateSync, make_snapshot: t.Callable, mocker: MockerFixture -): - """Test that the common delete_expired_snapshots function properly pages through batches and deletes them.""" - from sqlmesh.core.state_sync.common import delete_expired_snapshots - from sqlmesh.core.state_sync.common import ExpiredBatchRange, RowBoundary, LimitBoundary - from unittest.mock import MagicMock - - now_ts = now_timestamp() - - # Create 5 expired snapshots with different timestamps - snapshots = [] - for idx in range(5): - snapshot = make_snapshot( - SqlModel( - name=f"model_{idx}", - query=parse_one("select 1 as a, ds"), - ), - ) - snapshot.ttl = "in 10 seconds" - snapshot.categorize_as(SnapshotChangeCategory.BREAKING) - snapshot.updated_ts = now_ts - (20000 + idx * 1000) - snapshots.append(snapshot) - - state_sync.push_snapshots(snapshots) - - # Spy on get_expired_snapshots and delete_expired_snapshots methods - get_expired_spy = mocker.spy(state_sync, "get_expired_snapshots") - delete_expired_spy = mocker.spy(state_sync, "delete_expired_snapshots") - - # Mock snapshot evaluator - mock_evaluator = MagicMock() - mock_evaluator.cleanup = MagicMock() - - # Run delete_expired_snapshots with batch_size=2 - delete_expired_snapshots( - state_sync, - mock_evaluator, - current_ts=now_ts, - batch_size=2, - ) - - # Verify get_expired_snapshots was called the correct number of times: - # - 3 batches (2+2+1): each batch triggers 2 calls (one from iter_expired_snapshot_batches, one from delete_expired_snapshots) - # - Plus 1 final call that returns empty to exit the loop - # Total: 3 * 2 + 1 = 7 calls - assert get_expired_spy.call_count == 7 - - # Verify the progression of batch_range calls from the iter_expired_snapshot_batches loop - # (calls at indices 0, 2, 4, 6 are from iter_expired_snapshot_batches) - # (calls at indices 1, 3, 5 are from delete_expired_snapshots in facade.py) - calls = get_expired_spy.call_args_list - - # First call from iterator should have a batch_range starting from the beginning - first_call_kwargs = calls[0][1] - assert "batch_range" in first_call_kwargs - first_range = first_call_kwargs["batch_range"] - assert isinstance(first_range, ExpiredBatchRange) - assert isinstance(first_range.start, RowBoundary) - assert isinstance(first_range.end, LimitBoundary) - assert first_range.end.batch_size == 2 - assert first_range.start.updated_ts == 0 - assert first_range.start.name == "" - assert first_range.start.identifier == "" - - # Third call (second batch from iterator) should have a batch_range from the first batch's range - third_call_kwargs = calls[2][1] - assert "batch_range" in third_call_kwargs - second_range = third_call_kwargs["batch_range"] - assert isinstance(second_range, ExpiredBatchRange) - assert isinstance(second_range.start, RowBoundary) - assert isinstance(second_range.end, LimitBoundary) - assert second_range.end.batch_size == 2 - # Should have progressed from the first batch - assert second_range.start.updated_ts > 0 - assert second_range.start.name == '"model_3"' - - # Fifth call (third batch from iterator) should have a batch_range from the second batch's range - fifth_call_kwargs = calls[4][1] - assert "batch_range" in fifth_call_kwargs - third_range = fifth_call_kwargs["batch_range"] - assert isinstance(third_range, ExpiredBatchRange) - assert isinstance(third_range.start, RowBoundary) - assert isinstance(third_range.end, LimitBoundary) - assert third_range.end.batch_size == 2 - # Should have progressed from the second batch - assert third_range.start.updated_ts >= second_range.start.updated_ts - assert third_range.start.name == '"model_1"' - - # Seventh call (final call from iterator) should have a batch_range from the third batch's range - seventh_call_kwargs = calls[6][1] - assert "batch_range" in seventh_call_kwargs - fourth_range = seventh_call_kwargs["batch_range"] - assert isinstance(fourth_range, ExpiredBatchRange) - assert isinstance(fourth_range.start, RowBoundary) - assert isinstance(fourth_range.end, LimitBoundary) - assert fourth_range.end.batch_size == 2 - # Should have progressed from the third batch - assert fourth_range.start.updated_ts >= third_range.start.updated_ts - assert fourth_range.start.name == '"model_0"' - - # Verify delete_expired_snapshots was called 3 times (once per batch) - assert delete_expired_spy.call_count == 3 - - # Verify each delete call used a batch_range - delete_calls = delete_expired_spy.call_args_list - - # First call should have a batch_range matching the first batch - first_delete_kwargs = delete_calls[0][1] - assert "batch_range" in first_delete_kwargs - first_delete_range = first_delete_kwargs["batch_range"] - assert isinstance(first_delete_range, ExpiredBatchRange) - assert isinstance(first_delete_range.start, RowBoundary) - assert first_delete_range.start.updated_ts == 0 - assert isinstance(first_delete_range.end, RowBoundary) - assert first_delete_range.end.updated_ts == second_range.start.updated_ts - assert first_delete_range.end.name == second_range.start.name - assert first_delete_range.end.identifier == second_range.start.identifier - - second_delete_kwargs = delete_calls[1][1] - assert "batch_range" in second_delete_kwargs - second_delete_range = second_delete_kwargs["batch_range"] - assert isinstance(second_delete_range, ExpiredBatchRange) - assert isinstance(second_delete_range.start, RowBoundary) - assert second_delete_range.start.updated_ts == 0 - assert isinstance(second_delete_range.end, RowBoundary) - assert second_delete_range.end.updated_ts == third_range.start.updated_ts - assert second_delete_range.end.name == third_range.start.name - assert second_delete_range.end.identifier == third_range.start.identifier - - third_delete_kwargs = delete_calls[2][1] - assert "batch_range" in third_delete_kwargs - third_delete_range = third_delete_kwargs["batch_range"] - assert isinstance(third_delete_range, ExpiredBatchRange) - assert isinstance(third_delete_range.start, RowBoundary) - assert third_delete_range.start.updated_ts == 0 - assert isinstance(third_delete_range.end, RowBoundary) - assert third_delete_range.end.updated_ts == fourth_range.start.updated_ts - assert third_delete_range.end.name == fourth_range.start.name - assert third_delete_range.end.identifier == fourth_range.start.identifier - # Verify the cleanup method was called for each batch that had cleanup tasks - assert mock_evaluator.cleanup.call_count >= 1 - - # Verify all snapshots were deleted in the end - remaining = state_sync.get_snapshots(snapshots) - assert len(remaining) == 0 - - def test_delete_expired_snapshots_seed( state_sync: EngineAdapterStateSync, make_snapshot: t.Callable ): @@ -3089,105 +2938,6 @@ def test_cache(state_sync, make_snapshot, mocker): mock.assert_called() -def test_cleanup_expired_views( - mocker: MockerFixture, state_sync: EngineAdapterStateSync, make_snapshot: t.Callable -): - adapter = mocker.MagicMock() - adapter.dialect = None - snapshot_a = make_snapshot(SqlModel(name="catalog.schema.a", query=parse_one("select 1, ds"))) - snapshot_a.categorize_as(SnapshotChangeCategory.BREAKING) - snapshot_b = make_snapshot(SqlModel(name="catalog.schema.b", query=parse_one("select 1, ds"))) - snapshot_b.categorize_as(SnapshotChangeCategory.BREAKING) - # Make sure that we don't drop schemas from external models - snapshot_external_model = make_snapshot( - ExternalModel(name="catalog.external_schema.external_table", kind=ModelKindName.EXTERNAL) - ) - snapshot_external_model.categorize_as(SnapshotChangeCategory.BREAKING) - schema_environment = Environment( - name="test_environment", - suffix_target=EnvironmentSuffixTarget.SCHEMA, - snapshots=[ - snapshot_a.table_info, - snapshot_b.table_info, - snapshot_external_model.table_info, - ], - start_at="2022-01-01", - end_at="2022-01-01", - plan_id="test_plan_id", - previous_plan_id="test_plan_id", - catalog_name_override="catalog_override", - ) - snapshot_c = make_snapshot(SqlModel(name="catalog.schema.c", query=parse_one("select 1, ds"))) - snapshot_c.categorize_as(SnapshotChangeCategory.BREAKING) - snapshot_d = make_snapshot(SqlModel(name="catalog.schema.d", query=parse_one("select 1, ds"))) - snapshot_d.categorize_as(SnapshotChangeCategory.BREAKING) - table_environment = Environment( - name="test_environment", - suffix_target=EnvironmentSuffixTarget.TABLE, - snapshots=[ - snapshot_c.table_info, - snapshot_d.table_info, - snapshot_external_model.table_info, - ], - start_at="2022-01-01", - end_at="2022-01-01", - plan_id="test_plan_id", - previous_plan_id="test_plan_id", - catalog_name_override="catalog_override", - ) - cleanup_expired_views(adapter, {}, [schema_environment, table_environment]) - assert adapter.drop_schema.called - assert adapter.drop_view.called - assert adapter.drop_schema.call_args_list == [ - call( - schema_("schema__test_environment", "catalog_override"), - ignore_if_not_exists=True, - cascade=True, - ) - ] - assert sorted(adapter.drop_view.call_args_list) == [ - call("catalog_override.schema.c__test_environment", ignore_if_not_exists=True), - call("catalog_override.schema.d__test_environment", ignore_if_not_exists=True), - ] - - -@pytest.mark.parametrize( - "suffix_target", [EnvironmentSuffixTarget.SCHEMA, EnvironmentSuffixTarget.TABLE] -) -def test_cleanup_expired_environment_schema_warn_on_delete_failure( - mocker: MockerFixture, make_snapshot: t.Callable, suffix_target: EnvironmentSuffixTarget -): - adapter = mocker.MagicMock() - adapter.dialect = None - adapter.drop_schema.side_effect = Exception("Failed to drop the schema") - adapter.drop_view.side_effect = Exception("Failed to drop the view") - - snapshot = make_snapshot( - SqlModel(name="test_catalog.test_schema.test_model", query=parse_one("select 1, ds")) - ) - snapshot.categorize_as(SnapshotChangeCategory.BREAKING) - schema_environment = Environment( - name="test_environment", - suffix_target=suffix_target, - snapshots=[snapshot.table_info], - start_at="2022-01-01", - end_at="2022-01-01", - plan_id="test_plan_id", - previous_plan_id="test_plan_id", - catalog_name_override="catalog_override", - ) - - with pytest.raises(SQLMeshError, match="Failed to drop the expired environment .*"): - cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=False) - - cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=True) - - if suffix_target == EnvironmentSuffixTarget.SCHEMA: - assert adapter.drop_schema.called - else: - assert adapter.drop_view.called - - def test_max_interval_end_per_model( state_sync: EngineAdapterStateSync, make_snapshot: t.Callable ) -> None: diff --git a/tests/core/test_janitor.py b/tests/core/test_janitor.py new file mode 100644 index 0000000000..e5e209f2cc --- /dev/null +++ b/tests/core/test_janitor.py @@ -0,0 +1,282 @@ +import typing as t +from unittest.mock import call + +import pytest +from pytest_mock.plugin import MockerFixture + +from sqlmesh.core.config import EnvironmentSuffixTarget +from sqlmesh.core import constants as c +from sqlmesh.core.dialect import parse_one, schema_ +from sqlmesh.core.engine_adapter import create_engine_adapter +from sqlmesh.core.environment import Environment +from sqlmesh.core.model import ( + ModelKindName, + SqlModel, +) +from sqlmesh.core.model.definition import ExternalModel +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.core.state_sync import ( + EngineAdapterStateSync, +) +from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots +from sqlmesh.utils.date import now_timestamp +from sqlmesh.utils.errors import SQLMeshError + +pytestmark = pytest.mark.slow + + +@pytest.fixture +def state_sync(duck_conn, tmp_path): + state_sync = EngineAdapterStateSync( + create_engine_adapter(lambda: duck_conn, "duckdb"), + schema=c.SQLMESH, + cache_dir=tmp_path / c.CACHE, + ) + state_sync.migrate() + return state_sync + + +def test_cleanup_expired_views(mocker: MockerFixture, make_snapshot: t.Callable): + adapter = mocker.MagicMock() + adapter.dialect = None + snapshot_a = make_snapshot(SqlModel(name="catalog.schema.a", query=parse_one("select 1, ds"))) + snapshot_a.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot_b = make_snapshot(SqlModel(name="catalog.schema.b", query=parse_one("select 1, ds"))) + snapshot_b.categorize_as(SnapshotChangeCategory.BREAKING) + # Make sure that we don't drop schemas from external models + snapshot_external_model = make_snapshot( + ExternalModel(name="catalog.external_schema.external_table", kind=ModelKindName.EXTERNAL) + ) + snapshot_external_model.categorize_as(SnapshotChangeCategory.BREAKING) + schema_environment = Environment( + name="test_environment", + suffix_target=EnvironmentSuffixTarget.SCHEMA, + snapshots=[ + snapshot_a.table_info, + snapshot_b.table_info, + snapshot_external_model.table_info, + ], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="test_plan_id", + previous_plan_id="test_plan_id", + catalog_name_override="catalog_override", + ) + snapshot_c = make_snapshot(SqlModel(name="catalog.schema.c", query=parse_one("select 1, ds"))) + snapshot_c.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot_d = make_snapshot(SqlModel(name="catalog.schema.d", query=parse_one("select 1, ds"))) + snapshot_d.categorize_as(SnapshotChangeCategory.BREAKING) + table_environment = Environment( + name="test_environment", + suffix_target=EnvironmentSuffixTarget.TABLE, + snapshots=[ + snapshot_c.table_info, + snapshot_d.table_info, + snapshot_external_model.table_info, + ], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="test_plan_id", + previous_plan_id="test_plan_id", + catalog_name_override="catalog_override", + ) + cleanup_expired_views(adapter, {}, [schema_environment, table_environment]) + assert adapter.drop_schema.called + assert adapter.drop_view.called + assert adapter.drop_schema.call_args_list == [ + call( + schema_("schema__test_environment", "catalog_override"), + ignore_if_not_exists=True, + cascade=True, + ) + ] + assert sorted(adapter.drop_view.call_args_list) == [ + call("catalog_override.schema.c__test_environment", ignore_if_not_exists=True), + call("catalog_override.schema.d__test_environment", ignore_if_not_exists=True), + ] + + +@pytest.mark.parametrize( + "suffix_target", [EnvironmentSuffixTarget.SCHEMA, EnvironmentSuffixTarget.TABLE] +) +def test_cleanup_expired_environment_schema_warn_on_delete_failure( + mocker: MockerFixture, make_snapshot: t.Callable, suffix_target: EnvironmentSuffixTarget +): + adapter = mocker.MagicMock() + adapter.dialect = None + adapter.drop_schema.side_effect = Exception("Failed to drop the schema") + adapter.drop_view.side_effect = Exception("Failed to drop the view") + + snapshot = make_snapshot( + SqlModel(name="test_catalog.test_schema.test_model", query=parse_one("select 1, ds")) + ) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + schema_environment = Environment( + name="test_environment", + suffix_target=suffix_target, + snapshots=[snapshot.table_info], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="test_plan_id", + previous_plan_id="test_plan_id", + catalog_name_override="catalog_override", + ) + + with pytest.raises(SQLMeshError, match="Failed to drop the expired environment .*"): + cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=False) + + cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=True) + + if suffix_target == EnvironmentSuffixTarget.SCHEMA: + assert adapter.drop_schema.called + else: + assert adapter.drop_view.called + + +def test_delete_expired_snapshots_common_function_batching( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable, mocker: MockerFixture +): + """Test that the common delete_expired_snapshots function properly pages through batches and deletes them.""" + from sqlmesh.core.state_sync.common import ExpiredBatchRange, RowBoundary, LimitBoundary + from unittest.mock import MagicMock + + now_ts = now_timestamp() + + # Create 5 expired snapshots with different timestamps + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Spy on get_expired_snapshots and delete_expired_snapshots methods + get_expired_spy = mocker.spy(state_sync, "get_expired_snapshots") + delete_expired_spy = mocker.spy(state_sync, "delete_expired_snapshots") + + # Mock snapshot evaluator + mock_evaluator = MagicMock() + mock_evaluator.cleanup = MagicMock() + + # Run delete_expired_snapshots with batch_size=2 + delete_expired_snapshots( + state_sync, + mock_evaluator, + current_ts=now_ts, + batch_size=2, + ) + + # Verify get_expired_snapshots was called the correct number of times: + # - 3 batches (2+2+1): each batch triggers 2 calls (one from iter_expired_snapshot_batches, one from delete_expired_snapshots) + # - Plus 1 final call that returns empty to exit the loop + # Total: 3 * 2 + 1 = 7 calls + assert get_expired_spy.call_count == 7 + + # Verify the progression of batch_range calls from the iter_expired_snapshot_batches loop + # (calls at indices 0, 2, 4, 6 are from iter_expired_snapshot_batches) + # (calls at indices 1, 3, 5 are from delete_expired_snapshots in facade.py) + calls = get_expired_spy.call_args_list + + # First call from iterator should have a batch_range starting from the beginning + first_call_kwargs = calls[0][1] + assert "batch_range" in first_call_kwargs + first_range = first_call_kwargs["batch_range"] + assert isinstance(first_range, ExpiredBatchRange) + assert isinstance(first_range.start, RowBoundary) + assert isinstance(first_range.end, LimitBoundary) + assert first_range.end.batch_size == 2 + assert first_range.start.updated_ts == 0 + assert first_range.start.name == "" + assert first_range.start.identifier == "" + + # Third call (second batch from iterator) should have a batch_range from the first batch's range + third_call_kwargs = calls[2][1] + assert "batch_range" in third_call_kwargs + second_range = third_call_kwargs["batch_range"] + assert isinstance(second_range, ExpiredBatchRange) + assert isinstance(second_range.start, RowBoundary) + assert isinstance(second_range.end, LimitBoundary) + assert second_range.end.batch_size == 2 + # Should have progressed from the first batch + assert second_range.start.updated_ts > 0 + assert second_range.start.name == '"model_3"' + + # Fifth call (third batch from iterator) should have a batch_range from the second batch's range + fifth_call_kwargs = calls[4][1] + assert "batch_range" in fifth_call_kwargs + third_range = fifth_call_kwargs["batch_range"] + assert isinstance(third_range, ExpiredBatchRange) + assert isinstance(third_range.start, RowBoundary) + assert isinstance(third_range.end, LimitBoundary) + assert third_range.end.batch_size == 2 + # Should have progressed from the second batch + assert third_range.start.updated_ts >= second_range.start.updated_ts + assert third_range.start.name == '"model_1"' + + # Seventh call (final call from iterator) should have a batch_range from the third batch's range + seventh_call_kwargs = calls[6][1] + assert "batch_range" in seventh_call_kwargs + fourth_range = seventh_call_kwargs["batch_range"] + assert isinstance(fourth_range, ExpiredBatchRange) + assert isinstance(fourth_range.start, RowBoundary) + assert isinstance(fourth_range.end, LimitBoundary) + assert fourth_range.end.batch_size == 2 + # Should have progressed from the third batch + assert fourth_range.start.updated_ts >= third_range.start.updated_ts + assert fourth_range.start.name == '"model_0"' + + # Verify delete_expired_snapshots was called 3 times (once per batch) + assert delete_expired_spy.call_count == 3 + + # Verify each delete call used a batch_range + delete_calls = delete_expired_spy.call_args_list + + # First call should have a batch_range matching the first batch + first_delete_kwargs = delete_calls[0][1] + assert "batch_range" in first_delete_kwargs + first_delete_range = first_delete_kwargs["batch_range"] + assert isinstance(first_delete_range, ExpiredBatchRange) + assert isinstance(first_delete_range.start, RowBoundary) + assert first_delete_range.start.updated_ts == 0 + assert isinstance(first_delete_range.end, RowBoundary) + assert first_delete_range.end.updated_ts == second_range.start.updated_ts + assert first_delete_range.end.name == second_range.start.name + assert first_delete_range.end.identifier == second_range.start.identifier + + second_delete_kwargs = delete_calls[1][1] + assert "batch_range" in second_delete_kwargs + second_delete_range = second_delete_kwargs["batch_range"] + assert isinstance(second_delete_range, ExpiredBatchRange) + assert isinstance(second_delete_range.start, RowBoundary) + assert second_delete_range.start.updated_ts == 0 + assert isinstance(second_delete_range.end, RowBoundary) + assert second_delete_range.end.updated_ts == third_range.start.updated_ts + assert second_delete_range.end.name == third_range.start.name + assert second_delete_range.end.identifier == third_range.start.identifier + + third_delete_kwargs = delete_calls[2][1] + assert "batch_range" in third_delete_kwargs + third_delete_range = third_delete_kwargs["batch_range"] + assert isinstance(third_delete_range, ExpiredBatchRange) + assert isinstance(third_delete_range.start, RowBoundary) + assert third_delete_range.start.updated_ts == 0 + assert isinstance(third_delete_range.end, RowBoundary) + assert third_delete_range.end.updated_ts == fourth_range.start.updated_ts + assert third_delete_range.end.name == fourth_range.start.name + assert third_delete_range.end.identifier == fourth_range.start.identifier + # Verify the cleanup method was called for each batch that had cleanup tasks + assert mock_evaluator.cleanup.call_count >= 1 + + # Verify all snapshots were deleted in the end + remaining = state_sync.get_snapshots(snapshots) + assert len(remaining) == 0 From b866d33838d016fddf754f91ae26bfb7c536b7af Mon Sep 17 00:00:00 2001 From: Trey Spiller <1831878+treysp@users.noreply.github.com> Date: Wed, 8 Oct 2025 14:08:21 -0500 Subject: [PATCH 069/173] Fix: track shadowed jinja variable assignments correctly (#5503) --- sqlmesh/utils/jinja.py | 10 ++++++++- tests/dbt/test_manifest.py | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/sqlmesh/utils/jinja.py b/sqlmesh/utils/jinja.py index 59e9f6dd2f..240b183391 100644 --- a/sqlmesh/utils/jinja.py +++ b/sqlmesh/utils/jinja.py @@ -133,6 +133,12 @@ def find_call_names(node: nodes.Node, vars_in_scope: t.Set[str]) -> t.Iterator[C vars_in_scope = vars_in_scope.copy() for child_node in node.iter_child_nodes(): if "target" in child_node.fields: + # For nodes with assignment targets (Assign, AssignBlock, For, Import), + # the target name could shadow a reference in the right hand side. + # So we need to process the RHS before adding the target to scope. + # For example: {% set model = model.path %} should track model.path. + yield from find_call_names(child_node, vars_in_scope) + target = getattr(child_node, "target") if isinstance(target, nodes.Name): vars_in_scope.add(target.name) @@ -149,7 +155,9 @@ def find_call_names(node: nodes.Node, vars_in_scope: t.Set[str]) -> t.Iterator[C name = call_name(child_node) if name[0][0] != "'" and name[0] not in vars_in_scope: yield (name, child_node) - yield from find_call_names(child_node, vars_in_scope) + + if "target" not in child_node.fields: + yield from find_call_names(child_node, vars_in_scope) def extract_call_names( diff --git a/tests/dbt/test_manifest.py b/tests/dbt/test_manifest.py index e2e7bc706c..2ecf8b8980 100644 --- a/tests/dbt/test_manifest.py +++ b/tests/dbt/test_manifest.py @@ -324,3 +324,46 @@ def test_macro_depenency_none_str(): # "None" macro shouldn't raise a KeyError _macro_references(helper._manifest, node) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_macro_assignment_shadowing(create_empty_project): + project_name = "local" + project_path, models_path = create_empty_project(project_name=project_name) + + macros_path = project_path / "macros" + macros_path.mkdir() + + (macros_path / "model_path_macro.sql").write_text(""" +{% macro model_path_macro() %} + {% if execute %} + {% set model = model.path.split('/')[-1].replace('.sql', '') %} + SELECT '{{ model }}' as model_name + {% else %} + SELECT 'placeholder' as placeholder + {% endif %} +{% endmacro %} +""") + + (models_path / "model_using_path_macro.sql").write_text(""" +{{ model_path_macro() }} +""") + + context = DbtContext(project_path) + profile = Profile.load(context) + + helper = ManifestHelper( + project_path, + project_path, + project_name, + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + macros = helper.macros(project_name) + assert "model_path_macro" in macros + assert "path" in macros["model_path_macro"].dependencies.model_attrs.attrs + + models = helper.models() + assert "model_using_path_macro" in models + assert "path" in models["model_using_path_macro"].dependencies.model_attrs.attrs From c8bee084ad5a9658f1f9ea186a40c1435eec6bc4 Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Wed, 8 Oct 2025 22:36:27 +0300 Subject: [PATCH 070/173] Fix: Only keep refs and sources that exist to match dbt load time behaviour (#5509) --- sqlmesh/dbt/basemodel.py | 4 +++ tests/dbt/test_model.py | 56 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 0c719ebb88..32a76aba13 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -317,6 +317,10 @@ def sqlmesh_model_kwargs( dependencies = dependencies.union(custom_mat.dependencies) model_dialect = self.dialect(context) + + # Only keep refs and sources that exist in the context to match dbt behavior + dependencies.refs.intersection_update(context.refs) + dependencies.sources.intersection_update(context.sources) model_context = context.context_for_dependencies( dependencies.union(self.tests_ref_source_dependencies) ) diff --git a/tests/dbt/test_model.py b/tests/dbt/test_model.py index eb16a4b4b1..797d638858 100644 --- a/tests/dbt/test_model.py +++ b/tests/dbt/test_model.py @@ -18,6 +18,7 @@ from sqlmesh.dbt.test import TestConfig from sqlmesh.utils.yaml import YAML from sqlmesh.utils.date import to_ds +import typing as t pytestmark = pytest.mark.dbt @@ -1028,3 +1029,58 @@ def test_ephemeral_model_ignores_grants() -> None: assert sqlmesh_model.kind.is_embedded assert sqlmesh_model.grants is None # grants config is skipped for ephemeral / embedded models + + +def test_conditional_ref_in_unexecuted_branch(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='table', +) }} + +{% if true %} + WITH source AS ( + SELECT * + FROM {{ ref('simple_model_a') }} + ) +{% else %} + WITH source AS ( + SELECT * + FROM {{ ref('nonexistent_model') }} -- this doesn't exist but is in unexecuted branch + ) +{% endif %} + +SELECT * FROM source +""".strip() + + (models_dir / "conditional_ref_model.sql").write_text(test_model_content) + sushi_context = Context(paths=[str(temp_project)]) + + # the model should load successfully without raising MissingModelError + model = sushi_context.get_model("sushi.conditional_ref_model") + assert model is not None + + # Verify only the executed ref is in the dependencies + assert len(model.depends_on) == 1 + assert '"memory"."sushi"."simple_model_a"' in model.depends_on + + # Also the model can be rendered successfully with the executed ref + rendered = model.render_query() + assert rendered is not None + assert ( + rendered.sql() + == 'WITH "source" AS (SELECT "simple_model_a"."a" AS "a" FROM "memory"."sushi"."simple_model_a" AS "simple_model_a") SELECT "source"."a" AS "a" FROM "source" AS "source"' + ) + + # And run plan with this conditional model for good measure + plan = sushi_context.plan(select_models=["sushi.conditional_ref_model", "sushi.simple_model_a"]) + sushi_context.apply(plan) + upstream_ref = sushi_context.engine_adapter.fetchone("SELECT * FROM sushi.simple_model_a") + assert upstream_ref == (1,) + result = sushi_context.engine_adapter.fetchone("SELECT * FROM sushi.conditional_ref_model") + assert result == (1,) From 189208ecb41c99efca973f063a467713373f3116 Mon Sep 17 00:00:00 2001 From: David Dai Date: Wed, 8 Oct 2025 13:44:03 -0700 Subject: [PATCH 071/173] fix: add default (empty) tags to dbt builtin globals config (#5506) --- sqlmesh/dbt/builtin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlmesh/dbt/builtin.py b/sqlmesh/dbt/builtin.py index 145e29a96c..c1105a2981 100644 --- a/sqlmesh/dbt/builtin.py +++ b/sqlmesh/dbt/builtin.py @@ -482,7 +482,7 @@ def create_builtin_globals( if variables is not None: builtin_globals["var"] = Var(variables) - builtin_globals["config"] = Config(jinja_globals.pop("config", {})) + builtin_globals["config"] = Config(jinja_globals.pop("config", {"tags": []})) deployability_index = ( jinja_globals.get("deployability_index") or DeployabilityIndex.all_deployable() From 6919e692c486a3498f89bb553b538e3a20a1bfe7 Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Wed, 8 Oct 2025 15:47:19 -0700 Subject: [PATCH 072/173] fix(web_common): add more generic type for cll components (#5505) --- pnpm-lock.yaml | 45 ++-- web/common/package.json | 3 - .../ColumnLevelLineageContext.ts | 91 +++---- .../LineageColumnLevel/FactoryColumn.tsx | 21 +- .../Lineage/LineageColumnLevel/help.ts | 78 +++--- .../useColumnLevelLineage.ts | 23 +- .../src/components/Lineage/LineageContext.ts | 62 ++++- .../src/components/Lineage/LineageLayout.tsx | 10 +- .../components/Lineage/LineageLayoutBase.tsx | 87 +++++-- .../Lineage/edge/FactoryEdgeWithGradient.tsx | 10 +- web/common/src/components/Lineage/help.ts | 61 +++-- .../components/Lineage/layout/dagreLayout.ts | 15 +- .../src/components/Lineage/layout/help.ts | 43 +++- .../components/Lineage/node/NodeHandle.tsx | 7 +- .../components/Lineage/node/NodeHandles.tsx | 16 +- .../src/components/Lineage/node/NodePort.tsx | 24 +- .../components/Lineage/node/base-handle.tsx | 1 - .../Lineage/stories/Lineage.stories.tsx | 228 +++++++++--------- .../Lineage/stories/ModelLineage.tsx | 171 +++++++------ .../Lineage/stories/ModelLineageContext.ts | 59 ++++- .../Lineage/stories/ModelNodeColumn.tsx | 11 +- web/common/src/components/Lineage/utils.ts | 54 +++-- web/common/src/index.ts | 2 + web/common/src/types.ts | 40 ++- 24 files changed, 740 insertions(+), 422 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2fec93a8f3..aeacb362d0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -435,9 +435,6 @@ importers: '@types/dagre': specifier: 0.7.53 version: 0.7.53 - '@types/lodash': - specifier: 4.17.20 - version: 4.17.20 '@types/node': specifier: 20.11.25 version: 20.11.25 @@ -495,9 +492,6 @@ importers: globals: specifier: 16.3.0 version: 16.3.0 - lodash: - specifier: 4.17.21 - version: 4.17.21 lucide-react: specifier: 0.542.0 version: 0.542.0(react@18.3.1) @@ -2662,9 +2656,6 @@ packages: '@types/jsonfile@6.1.4': resolution: {integrity: sha512-D5qGUYwjvnNNextdU59/+fI+spnwtTFmyQP0h+PfIOSkNfpU6AOICUOkm4i0OnSk+NyjdPJrxCDro0sJsWlRpQ==} - '@types/lodash@4.17.20': - resolution: {integrity: sha512-H3MHACvFUEiujabxhaI/ImO6gUrd8oOurg7LQtS7mbwIXA/cUqWrvBsaeJ23aZEPk1TAYkurjfMbSELfoCXlGA==} - '@types/mdast@4.0.4': resolution: {integrity: sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==} @@ -7064,7 +7055,7 @@ snapshots: '@babel/traverse': 7.28.0 '@babel/types': 7.28.1 convert-source-map: 2.0.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 gensync: 1.0.0-beta.2 json5: 2.2.3 semver: 6.3.1 @@ -7232,7 +7223,7 @@ snapshots: '@babel/parser': 7.28.0 '@babel/template': 7.27.2 '@babel/types': 7.28.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 transitivePeerDependencies: - supports-color @@ -7489,7 +7480,7 @@ snapshots: '@eslint/config-array@0.21.0': dependencies: '@eslint/object-schema': 2.1.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 minimatch: 3.1.2 transitivePeerDependencies: - supports-color @@ -7503,7 +7494,7 @@ snapshots: '@eslint/eslintrc@3.3.1': dependencies: ajv: 6.12.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 espree: 10.4.0 globals: 14.0.0 ignore: 5.3.2 @@ -9425,8 +9416,6 @@ snapshots: dependencies: '@types/node': 20.11.25 - '@types/lodash@4.17.20': {} - '@types/mdast@4.0.4': dependencies: '@types/unist': 3.0.3 @@ -9502,7 +9491,7 @@ snapshots: '@typescript-eslint/types': 8.38.0 '@typescript-eslint/typescript-estree': 8.38.0(typescript@5.8.3) '@typescript-eslint/visitor-keys': 8.38.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 eslint: 9.31.0(jiti@2.4.2) typescript: 5.8.3 transitivePeerDependencies: @@ -9512,7 +9501,7 @@ snapshots: dependencies: '@typescript-eslint/tsconfig-utils': 8.38.0(typescript@5.8.3) '@typescript-eslint/types': 8.38.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 typescript: 5.8.3 transitivePeerDependencies: - supports-color @@ -9531,7 +9520,7 @@ snapshots: '@typescript-eslint/types': 8.38.0 '@typescript-eslint/typescript-estree': 8.38.0(typescript@5.8.3) '@typescript-eslint/utils': 8.38.0(eslint@9.31.0(jiti@2.4.2))(typescript@5.8.3) - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 eslint: 9.31.0(jiti@2.4.2) ts-api-utils: 2.1.0(typescript@5.8.3) typescript: 5.8.3 @@ -9546,7 +9535,7 @@ snapshots: '@typescript-eslint/tsconfig-utils': 8.38.0(typescript@5.8.3) '@typescript-eslint/types': 8.38.0 '@typescript-eslint/visitor-keys': 8.38.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 fast-glob: 3.3.3 is-glob: 4.0.3 minimatch: 9.0.5 @@ -10644,6 +10633,10 @@ snapshots: de-indent@1.0.2: {} + debug@4.4.1: + dependencies: + ms: 2.1.3 + debug@4.4.1(supports-color@8.1.1): dependencies: ms: 2.1.3 @@ -10916,7 +10909,7 @@ snapshots: esbuild-register@3.6.0(esbuild@0.25.8): dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 esbuild: 0.25.8 transitivePeerDependencies: - supports-color @@ -10999,7 +10992,7 @@ snapshots: ajv: 6.12.6 chalk: 4.1.2 cross-spawn: 7.0.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 escape-string-regexp: 4.0.0 eslint-scope: 8.4.0 eslint-visitor-keys: 4.2.1 @@ -11410,7 +11403,7 @@ snapshots: http-proxy-agent@7.0.2: dependencies: agent-base: 7.1.4 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 transitivePeerDependencies: - supports-color @@ -11419,7 +11412,7 @@ snapshots: https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.4 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 transitivePeerDependencies: - supports-color @@ -13992,7 +13985,7 @@ snapshots: vite-node@3.2.4(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 es-module-lexer: 1.7.0 pathe: 2.0.3 vite: 6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0) @@ -14042,7 +14035,7 @@ snapshots: '@volar/typescript': 2.4.23 '@vue/language-core': 2.2.0(typescript@5.8.3) compare-versions: 6.1.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 kolorist: 1.8.0 local-pkg: 1.1.1 magic-string: 0.30.17 @@ -14108,7 +14101,7 @@ snapshots: '@vitest/spy': 3.2.4 '@vitest/utils': 3.2.4 chai: 5.2.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 expect-type: 1.2.2 magic-string: 0.30.17 pathe: 2.0.3 diff --git a/web/common/package.json b/web/common/package.json index ef91337174..6a0965f19e 100644 --- a/web/common/package.json +++ b/web/common/package.json @@ -14,7 +14,6 @@ "@testing-library/react": "16.3.0", "@testing-library/user-event": "14.6.1", "@types/dagre": "0.7.53", - "@types/lodash": "4.17.20", "@types/node": "20.11.25", "@types/react": "18.3.23", "@types/react-dom": "18.3.7", @@ -34,7 +33,6 @@ "eslint-plugin-storybook": "9.1.5", "fuse.js": "7.1.0", "globals": "16.3.0", - "lodash": "4.17.21", "lucide-react": "0.542.0", "playwright": "1.54.1", "postcss": "8.5.6", @@ -95,7 +93,6 @@ "dagre": "0.8.5", "deepmerge": "4.3.1", "fuse.js": "7.1.0", - "lodash": "4.17.21", "lucide-react": "0.542.0", "react": "18.3.1", "react-dom": "18.3.1", diff --git a/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts b/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts index 227fc70394..4dd6ca93ef 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts +++ b/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts @@ -2,64 +2,36 @@ import React from 'react' import { type PortId } from '../utils' -export type LineageColumn = { - source?: string | null - expression?: string | null - models: Record -} - -export type ColumnLevelModelConnections< - TAdjacencyListKey extends string, - TAdjacencyListColumnKey extends string, -> = Record -export type ColumnLevelDetails< - TAdjacencyListKey extends string, - TAdjacencyListColumnKey extends string, -> = Omit & { - models: ColumnLevelModelConnections< - TAdjacencyListKey, - TAdjacencyListColumnKey - > -} -export type ColumnLevelConnections< - TAdjacencyListKey extends string, - TAdjacencyListColumnKey extends string, -> = Record< - TAdjacencyListColumnKey, - ColumnLevelDetails -> export type ColumnLevelLineageAdjacencyList< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, -> = Record< - TAdjacencyListKey, - ColumnLevelConnections -> +> = { + [K in TAdjacencyListKey]: { + [C in TAdjacencyListColumnKey]: { + source?: string | null + expression?: string | null + models: Record + } + } +} export type ColumnLevelLineageContextValue< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, TColumnID extends string = PortId, -> = { - adjacencyListColumnLevel: ColumnLevelLineageAdjacencyList< + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< TAdjacencyListKey, TAdjacencyListColumnKey - > + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +> = { + adjacencyListColumnLevel: TColumnLevelLineageAdjacencyList selectedColumns: Set - columnLevelLineage: Map< - TColumnID, - ColumnLevelLineageAdjacencyList - > + columnLevelLineage: Map setColumnLevelLineage: React.Dispatch< - React.SetStateAction< - Map< - TColumnID, - ColumnLevelLineageAdjacencyList< - TAdjacencyListKey, - TAdjacencyListColumnKey - > - > - > + React.SetStateAction> > showColumns: boolean setShowColumns: React.Dispatch> @@ -71,16 +43,17 @@ export function getColumnLevelLineageContextInitial< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, TColumnID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, >() { return { - adjacencyListColumnLevel: {}, - columnLevelLineage: new Map< - TColumnID, - ColumnLevelLineageAdjacencyList< - TAdjacencyListKey, - TAdjacencyListColumnKey - > - >(), + adjacencyListColumnLevel: {} as TColumnLevelLineageAdjacencyList, + columnLevelLineage: new Map(), setColumnLevelLineage: () => {}, showColumns: false, setShowColumns: () => {}, @@ -94,8 +67,16 @@ export type ColumnLevelLineageContextHook< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, TColumnID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, > = () => ColumnLevelLineageContextValue< TAdjacencyListKey, TAdjacencyListColumnKey, - TColumnID + TColumnID, + TColumnLevelLineageAdjacencyList > diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx index 90def0f5ea..350437c16e 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx @@ -10,7 +10,7 @@ import React from 'react' import { cn } from '@/utils' import { NodeBadge } from '../node/NodeBadge' import { NodePort } from '../node/NodePort' -import { type NodeId, type PortId } from '../utils' +import { type NodeId, type PortHandleId, type PortId } from '../utils' import { type ColumnLevelLineageAdjacencyList, type ColumnLevelLineageContextHook, @@ -28,11 +28,21 @@ export function FactoryColumn< TAdjacencyListColumnKey extends string, TNodeID extends string = NodeId, TColumnID extends string = PortId, + TLeftPortHandleId extends string = PortHandleId, + TRightPortHandleId extends string = PortHandleId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, >( useLineage: ColumnLevelLineageContextHook< TAdjacencyListKey, TAdjacencyListColumnKey, - TColumnID + TColumnID, + TColumnLevelLineageAdjacencyList >, ) { return React.memo(function FactoryColumn({ @@ -59,10 +69,7 @@ export function FactoryColumn< type: string description?: string | null className?: string - data?: ColumnLevelLineageAdjacencyList< - TAdjacencyListKey, - TAdjacencyListColumnKey - > + data?: TColumnLevelLineageAdjacencyList isFetching?: boolean error?: Error | null renderError?: (error: Error) => React.ReactNode @@ -248,7 +255,7 @@ export function FactoryColumn< } return isSelectedColumn ? ( - id={id} nodeId={nodeId} className={cn( diff --git a/web/common/src/components/Lineage/LineageColumnLevel/help.ts b/web/common/src/components/Lineage/LineageColumnLevel/help.ts index 30115450cd..fe75ed162a 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/help.ts +++ b/web/common/src/components/Lineage/LineageColumnLevel/help.ts @@ -9,28 +9,26 @@ import { type PortId, type TransformEdgeFn, } from '../utils' -import { - type ColumnLevelConnections, - type ColumnLevelDetails, - type ColumnLevelLineageAdjacencyList, -} from './ColumnLevelLineageContext' +import { type ColumnLevelLineageAdjacencyList } from './ColumnLevelLineageContext' export const MAX_COLUMNS_TO_DISPLAY = 5 export function getAdjacencyListKeysFromColumnLineage< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, ->( - columnLineage: ColumnLevelLineageAdjacencyList< + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< TAdjacencyListKey, TAdjacencyListColumnKey >, -) { +>(columnLineage: TColumnLevelLineageAdjacencyList) { const adjacencyListKeys = new Set() const targets = Object.entries(columnLineage) as [ TAdjacencyListKey, - ColumnLevelConnections, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey], ][] for (const [sourceModelName, targetColumns] of targets) { @@ -38,7 +36,7 @@ export function getAdjacencyListKeysFromColumnLineage< const targetConnections = Object.entries(targetColumns) as [ TAdjacencyListColumnKey, - ColumnLevelDetails, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey][TAdjacencyListColumnKey], ][] for (const [, { models: sourceModels }] of targetConnections) { @@ -58,32 +56,52 @@ export function getEdgesFromColumnLineage< TAdjacencyListColumnKey extends string, TEdgeData extends LineageEdgeData = LineageEdgeData, TEdgeID extends string = EdgeId, - TNodeID extends string = NodeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, >({ columnLineage, transformEdge, }: { - columnLineage: ColumnLevelLineageAdjacencyList< - TAdjacencyListKey, - TAdjacencyListColumnKey + columnLineage: TColumnLevelLineageAdjacencyList + transformEdge: TransformEdgeFn< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID > - transformEdge: TransformEdgeFn }) { - const edges: LineageEdge[] = [] - const modelLevelEdgeIDs = new Map() + const edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] = [] + const modelLevelEdgeIDs = new Map() const targets = Object.entries(columnLineage || {}) as [ TAdjacencyListKey, - ColumnLevelConnections, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey], ][] for (const [targetModelName, targetColumns] of targets) { const targetConnections = Object.entries(targetColumns) as [ TAdjacencyListColumnKey, - ColumnLevelDetails, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey][TAdjacencyListColumnKey], ][] - const targetNodeId = toNodeID(targetModelName) + const targetNodeId = toNodeID(targetModelName) for (const [ targetColumnName, @@ -95,7 +113,7 @@ export function getEdgesFromColumnLineage< ][] for (const [sourceModelName, sourceColumns] of sources) { - const sourceNodeId = toNodeID(sourceModelName) + const sourceNodeId = toNodeID(sourceModelName) modelLevelEdgeIDs.set( toEdgeID(sourceModelName, targetModelName), @@ -109,11 +127,11 @@ export function getEdgesFromColumnLineage< targetModelName, targetColumnName, ) - const sourceColumnId = toPortID( + const sourceColumnId = toPortID( sourceModelName, sourceColumnName, ) - const targetColumnId = toPortID( + const targetColumnId = toPortID( targetModelName, targetColumnName, ) @@ -145,22 +163,24 @@ export function getConnectedColumnsIDs< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, TColumnID extends string = PortId, ->( - adjacencyList: ColumnLevelLineageAdjacencyList< + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< TAdjacencyListKey, TAdjacencyListColumnKey >, -) { +>(adjacencyList: TColumnLevelLineageAdjacencyList) { const connectedColumns = new Set() const targets = Object.entries(adjacencyList) as [ TAdjacencyListKey, - ColumnLevelConnections, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey], ][] for (const [sourceModelName, targetColumns] of targets) { const targetConnections = Object.entries(targetColumns) as [ TAdjacencyListColumnKey, - ColumnLevelDetails, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey][TAdjacencyListColumnKey], ][] for (const [ diff --git a/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts b/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts index da1a6b8ee8..53032c2c12 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts +++ b/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts @@ -12,19 +12,18 @@ export function useColumnLevelLineage< TAdjacencyListKey extends string, TAdjacencyListColumnKey extends string, TColumnID extends string = PortId, ->( - columnLevelLineage: Map< - TColumnID, - ColumnLevelLineageAdjacencyList + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey >, -) { +>(columnLevelLineage: Map) { const adjacencyListColumnLevel = React.useMemo(() => { return merge.all(Array.from(columnLevelLineage.values()), { arrayMerge: (dest, source) => Array.from(new Set([...dest, ...source])), - }) as ColumnLevelLineageAdjacencyList< - TAdjacencyListKey, - TAdjacencyListColumnKey - > + }) as TColumnLevelLineageAdjacencyList }, [columnLevelLineage]) const selectedColumns = React.useMemo(() => { @@ -37,7 +36,11 @@ export function useColumnLevelLineage< const adjacencyListKeysColumnLevel = React.useMemo(() => { return adjacencyListColumnLevel != null - ? getAdjacencyListKeysFromColumnLineage(adjacencyListColumnLevel) + ? getAdjacencyListKeysFromColumnLineage< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnLevelLineageAdjacencyList + >(adjacencyListColumnLevel) : [] }, [adjacencyListColumnLevel]) diff --git a/web/common/src/components/Lineage/LineageContext.ts b/web/common/src/components/Lineage/LineageContext.ts index 9da54dcbee..4a90031217 100644 --- a/web/common/src/components/Lineage/LineageContext.ts +++ b/web/common/src/components/Lineage/LineageContext.ts @@ -17,7 +17,10 @@ export interface LineageContextValue< TEdgeData extends LineageEdgeData = LineageEdgeData, TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, > { // Node selection showOnlySelectedNodes: boolean @@ -34,9 +37,25 @@ export interface LineageContextValue< setZoom: React.Dispatch> // Nodes and Edges - edges: LineageEdge[] + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] setEdges: React.Dispatch< - React.SetStateAction[]> + React.SetStateAction< + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] + > > nodes: LineageNode[] nodesMap: LineageNodesMap @@ -73,22 +92,49 @@ export type LineageContextHook< TEdgeData extends LineageEdgeData = LineageEdgeData, TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, -> = () => LineageContextValue + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +> = () => LineageContextValue< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID +> export function createLineageContext< TNodeData extends LineageNodeData = LineageNodeData, TEdgeData extends LineageEdgeData = LineageEdgeData, TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, TLineageContextValue extends LineageContextValue< TNodeData, TEdgeData, TNodeID, TEdgeID, - TPortID - > = LineageContextValue, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > = LineageContextValue< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >, >(initial: TLineageContextValue) { const LineageContext = React.createContext(initial) diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx index 2ab4a34879..a9b5ec512f 100644 --- a/web/common/src/components/Lineage/LineageLayout.tsx +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -26,7 +26,10 @@ export function LineageLayout< TEdgeData extends LineageEdgeData = LineageEdgeData, TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >({ nodeTypes, edgeTypes, @@ -44,7 +47,10 @@ export function LineageLayout< TEdgeData, TNodeID, TEdgeID, - TPortID + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID > isBuildingLayout?: boolean nodeTypes?: NodeTypes diff --git a/web/common/src/components/Lineage/LineageLayoutBase.tsx b/web/common/src/components/Lineage/LineageLayoutBase.tsx index a21c1bac17..6d3975d19a 100644 --- a/web/common/src/components/Lineage/LineageLayoutBase.tsx +++ b/web/common/src/components/Lineage/LineageLayoutBase.tsx @@ -20,7 +20,6 @@ import { import '@xyflow/react/dist/style.css' import './Lineage.css' -import { debounce } from 'lodash' import { CircuitBoard, Crosshair, LocateFixed, RotateCcw } from 'lucide-react' import React from 'react' @@ -39,8 +38,8 @@ import { NODES_TRESHOLD_ZOOM, type NodeId, type EdgeId, - ZOOM_THRESHOLD, type PortId, + ZOOM_THRESHOLD, } from './utils' import '@xyflow/react/dist/style.css' @@ -50,9 +49,12 @@ import { cn } from '@/utils' export function LineageLayoutBase< TNodeData extends LineageNodeData = LineageNodeData, TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TNodeID extends string = NodeId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >({ nodeTypes, edgeTypes, @@ -69,7 +71,10 @@ export function LineageLayoutBase< TEdgeData, TNodeID, TEdgeID, - TPortID + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID > nodesDraggable?: boolean nodesConnectable?: boolean @@ -106,8 +111,19 @@ export function LineageLayoutBase< setSelectedEdges, } = useLineage() - const [nodes, setNodes] = React.useState(initialNodes) - const [edges, setEdges] = React.useState(initialEdges) + const [nodes, setNodes] = React.useState[]>( + [], + ) + const [edges, setEdges] = React.useState< + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] + >([]) const onNodesChange = React.useCallback( (changes: NodeChange>[]) => { @@ -120,13 +136,28 @@ export function LineageLayoutBase< const onEdgesChange = React.useCallback( ( - changes: EdgeChange>[], + changes: EdgeChange< + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > + >[], ) => { setEdges( - applyEdgeChanges>( - changes, - edges, - ), + applyEdgeChanges< + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > + >(changes, edges), ) }, [edges, setEdges], @@ -235,12 +266,23 @@ export function LineageLayoutBase< const connectedEdges = getConnectedEdges< LineageNode, - LineageEdge + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > >(connectedNodes, edges) const selectedNodes = new Set(connectedNodes.map(node => node.id)) const selectedEdges = new Set( connectedEdges.reduce((acc, edge) => { - if ([edge.source, edge.target].every(id => selectedNodes.has(id))) { + if ( + [edge.source, edge.target].every(id => + selectedNodes.has(id as unknown as TNodeID), + ) + ) { edge.zIndex = 2 acc.add(edge.id) } else { @@ -278,7 +320,14 @@ export function LineageLayoutBase< return ( , - LineageEdge + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > > className={cn('shrink-0', className)} nodes={nodes} @@ -351,3 +400,11 @@ export function LineageLayoutBase< ) } + +function debounce(func: T, wait: number) { + let timeout: NodeJS.Timeout + return (...args: unknown[]) => { + clearTimeout(timeout) + timeout = setTimeout(() => func(...args), wait) + } +} diff --git a/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx b/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx index a89027ffef..aee8790b35 100644 --- a/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx +++ b/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx @@ -15,14 +15,20 @@ export function FactoryEdgeWithGradient< TEdgeData extends EdgeData = EdgeData, TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >( useLineage: LineageContextHook< TNodeData, TEdgeData, TNodeID, TEdgeID, - TPortID + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID >, ) { return React.memo(({ data, id, ...props }: EdgeProps>) => { diff --git a/web/common/src/components/Lineage/help.ts b/web/common/src/components/Lineage/help.ts index 97f4ad9542..e8041d9f56 100644 --- a/web/common/src/components/Lineage/help.ts +++ b/web/common/src/components/Lineage/help.ts @@ -60,13 +60,22 @@ export function getTransformedNodes< export function getTransformedModelEdgesSourceTargets< TAdjacencyListKey extends string, TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >( adjacencyListKeys: TAdjacencyListKey[], lineageAdjacencyList: LineageAdjacencyList, - transformEdge: TransformEdgeFn, + transformEdge: TransformEdgeFn< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >, ) { const nodesCount = adjacencyListKeys.length @@ -76,7 +85,7 @@ export function getTransformedModelEdgesSourceTargets< for (let i = 0; i < nodesCount; i++) { const sourceAdjacencyListKey = adjacencyListKeys[i] - const sourceNodeId = toNodeID(sourceAdjacencyListKey) + const sourceNodeId = toNodeID(sourceAdjacencyListKey) const targets = lineageAdjacencyList[sourceAdjacencyListKey] const targetsCount = targets?.length || 0 @@ -91,7 +100,7 @@ export function getTransformedModelEdgesSourceTargets< sourceAdjacencyListKey, targetAdjacencyListKey, ) - const targetNodeId = toNodeID(targetAdjacencyListKey) + const targetNodeId = toNodeID(targetAdjacencyListKey) edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) } @@ -103,13 +112,22 @@ export function getTransformedModelEdgesSourceTargets< export function getTransformedModelEdgesTargetSources< TAdjacencyListKey extends string, TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >( adjacencyListKeys: TAdjacencyListKey[], lineageAdjacencyList: LineageAdjacencyList, - transformEdge: TransformEdgeFn, + transformEdge: TransformEdgeFn< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >, ) { const nodesCount = adjacencyListKeys.length @@ -119,7 +137,7 @@ export function getTransformedModelEdgesTargetSources< for (let i = 0; i < nodesCount; i++) { const targetAdjacencyListKey = adjacencyListKeys[i] - const targetNodeId = toNodeID(targetAdjacencyListKey) + const targetNodeId = toNodeID(targetAdjacencyListKey) const sources = lineageAdjacencyList[targetAdjacencyListKey] const sourcesCount = sources?.length || 0 @@ -134,7 +152,7 @@ export function getTransformedModelEdgesTargetSources< sourceAdjacencyListKey, targetAdjacencyListKey, ) - const sourceNodeId = toNodeID(sourceAdjacencyListKey) + const sourceNodeId = toNodeID(sourceAdjacencyListKey) edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) } @@ -206,18 +224,27 @@ export function calculateNodeDetailsHeight({ export function createEdge< TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >( type: string, edgeId: TEdgeID, - sourceId: TNodeID, - targetId: TNodeID, - sourceHandleId?: TPortID, - targetHandleId?: TPortID, + sourceId: TSourceID, + targetId: TTargetID, + sourceHandleId?: TSourceHandleID, + targetHandleId?: TTargetHandleID, data?: TEdgeData, -): LineageEdge { +): LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID +> { return { id: edgeId, source: sourceId, diff --git a/web/common/src/components/Lineage/layout/dagreLayout.ts b/web/common/src/components/Lineage/layout/dagreLayout.ts index 83714a2220..554d427f03 100644 --- a/web/common/src/components/Lineage/layout/dagreLayout.ts +++ b/web/common/src/components/Lineage/layout/dagreLayout.ts @@ -13,14 +13,23 @@ import dagre from 'dagre' export function buildLayout< TNodeData extends LineageNodeData = LineageNodeData, TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >({ edges, nodesMap, }: { - edges: LineageEdge[] + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] nodesMap: LineageNodesMap }) { const nodes = Object.values(nodesMap) diff --git a/web/common/src/components/Lineage/layout/help.ts b/web/common/src/components/Lineage/layout/help.ts index 91b3ebc4a3..d0dada83f5 100644 --- a/web/common/src/components/Lineage/layout/help.ts +++ b/web/common/src/components/Lineage/layout/help.ts @@ -24,14 +24,33 @@ export function getWorker(url: URL): Worker { export async function getLayoutedGraph< TNodeData extends LineageNodeData = LineageNodeData, TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, >( - edges: LineageEdge[], + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[], nodesMap: LineageNodesMap, workerUrl: URL, -): Promise> { +): Promise< + LayoutedGraph< + TNodeData, + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > +> { let timeoutId: NodeJS.Timeout | null = null return new Promise((resolve, reject) => { @@ -56,9 +75,11 @@ export async function getLayoutedGraph< worker.postMessage({ edges, nodesMap } as LayoutedGraph< TNodeData, TEdgeData, - TNodeID, TEdgeID, - TPortID + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID >) } catch (postError) { errorHandler(postError as ErrorEvent) @@ -66,7 +87,15 @@ export async function getLayoutedGraph< function handler( event: MessageEvent< - LayoutedGraph & { + LayoutedGraph< + TNodeData, + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > & { error: ErrorEvent } >, diff --git a/web/common/src/components/Lineage/node/NodeHandle.tsx b/web/common/src/components/Lineage/node/NodeHandle.tsx index 4bfbfa6181..d50d90422a 100644 --- a/web/common/src/components/Lineage/node/NodeHandle.tsx +++ b/web/common/src/components/Lineage/node/NodeHandle.tsx @@ -3,8 +3,9 @@ import React from 'react' import { cn } from '@/utils' import { BaseHandle } from './base-handle' +import type { HandleId } from '../utils' -export const NodeHandle = React.memo(function NodeHandle({ +export function NodeHandle({ type, id, children, @@ -12,7 +13,7 @@ export const NodeHandle = React.memo(function NodeHandle({ ...props }: { type: 'target' | 'source' - id: string + id: THandleId children: React.ReactNode className?: string }) { @@ -29,4 +30,4 @@ export const NodeHandle = React.memo(function NodeHandle({ {children} ) -}) +} diff --git a/web/common/src/components/Lineage/node/NodeHandles.tsx b/web/common/src/components/Lineage/node/NodeHandles.tsx index 71bee716b4..453ff74317 100644 --- a/web/common/src/components/Lineage/node/NodeHandles.tsx +++ b/web/common/src/components/Lineage/node/NodeHandles.tsx @@ -3,8 +3,12 @@ import React from 'react' import { cn } from '@/utils' import { HorizontalContainer } from '@/components/HorizontalContainer/HorizontalContainer' import { NodeHandle } from './NodeHandle' +import type { HandleId } from '../utils' -export const NodeHandles = React.memo(function NodeHandles({ +export function NodeHandles< + TLeftHandleId extends string = HandleId, + TRightHandleId extends string = HandleId, +>({ leftIcon, rightIcon, leftId, @@ -13,8 +17,8 @@ export const NodeHandles = React.memo(function NodeHandles({ handleClassName, children, }: { - leftId?: string - rightId?: string + leftId?: TLeftHandleId + rightId?: TRightHandleId className?: string handleClassName?: string children: React.ReactNode @@ -27,7 +31,7 @@ export const NodeHandles = React.memo(function NodeHandles({ data-component="NodeHandles" > {leftId && ( - type="target" id={leftId} className={cn('left-0', handleClassName)} @@ -37,7 +41,7 @@ export const NodeHandles = React.memo(function NodeHandles({ )} {children} {rightId && ( - type="source" id={rightId} className={cn('right-0', handleClassName)} @@ -47,4 +51,4 @@ export const NodeHandles = React.memo(function NodeHandles({ )} ) -}) +} diff --git a/web/common/src/components/Lineage/node/NodePort.tsx b/web/common/src/components/Lineage/node/NodePort.tsx index b961d4e01a..7380716f02 100644 --- a/web/common/src/components/Lineage/node/NodePort.tsx +++ b/web/common/src/components/Lineage/node/NodePort.tsx @@ -2,12 +2,14 @@ import { useNodeConnections, useUpdateNodeInternals } from '@xyflow/react' import React from 'react' import { cn } from '@/utils' -import { type NodeId, type PortId } from '../utils' +import { type NodeId, type PortHandleId } from '../utils' import { NodeHandles } from './NodeHandles' -export const NodePort = React.memo(function NodePort< - TPortId extends string = PortId, +export function NodePort< + TPortId extends string = PortHandleId, TNodeID extends string = NodeId, + TLeftPortHandleId extends string = PortHandleId, + TRightPortHandleId extends string = PortHandleId, >({ id, nodeId, @@ -32,8 +34,16 @@ export const NodePort = React.memo(function NodePort< handleId: id, }) - const leftId = targets.length > 0 ? id : undefined - const rightId = sources.length > 0 ? id : undefined + const isLeftHandleId = (id: TPortId): id is TPortId & TLeftPortHandleId => { + return id && targets.length > 0 + } + + const isRightHandleId = (id: TPortId): id is TPortId & TRightPortHandleId => { + return id && sources.length > 0 + } + + const leftId = isLeftHandleId(id) ? id : undefined + const rightId = isRightHandleId(id) ? id : undefined React.useEffect(() => { if (leftId || rightId) { @@ -42,7 +52,7 @@ export const NodePort = React.memo(function NodePort< }, [updateNodeInternals, nodeId, leftId, rightId]) return ( - data-component="NodePort" leftIcon={ @@ -61,4 +71,4 @@ export const NodePort = React.memo(function NodePort< {children} ) -}) +} diff --git a/web/common/src/components/Lineage/node/base-handle.tsx b/web/common/src/components/Lineage/node/base-handle.tsx index 76d66bdeaf..e6b8f0c24b 100644 --- a/web/common/src/components/Lineage/node/base-handle.tsx +++ b/web/common/src/components/Lineage/node/base-handle.tsx @@ -16,7 +16,6 @@ export const BaseHandle: ForwardRefExoticComponent< 'fixed flex justify-center items-center border-none transition', className, )} - {...props} > {children} diff --git a/web/common/src/components/Lineage/stories/Lineage.stories.tsx b/web/common/src/components/Lineage/stories/Lineage.stories.tsx index 6e16bed61e..115be3c2c0 100644 --- a/web/common/src/components/Lineage/stories/Lineage.stories.tsx +++ b/web/common/src/components/Lineage/stories/Lineage.stories.tsx @@ -1,12 +1,125 @@ import type { LineageAdjacencyList, LineageDetails } from '../utils' import { ModelLineage } from './ModelLineage' -import type { ModelLineageNodeDetails, ModelName } from './ModelLineageContext' +import type { + BrandedLineageAdjacencyList, + BrandedLineageDetails, + ModelLineageNodeDetails, + ModelName, +} from './ModelLineageContext' export default { title: 'Components/Lineage', } +const adjacencyList = { + 'sqlmesh.sushi.raw_orders': ['sqlmesh.sushi.orders'], + 'sqlmesh.sushi.orders': [], +} as Record + +const lineageDetails = { + 'sqlmesh.sushi.raw_orders': { + name: 'sqlmesh.sushi.raw_orders', + display_name: 'sushi.raw_orders', + identifier: '123456789', + version: '123456789', + dialect: 'bigquery', + cron: '0 0 * * *', + owner: 'admin', + kind: 'INCREMENTAL_BY_TIME', + model_type: 'python', + tags: ['test', 'tag', 'another tag'], + columns: { + user_id: { + data_type: 'STRING', + description: 'node', + }, + event_id: { + data_type: 'STRING', + description: 'node', + }, + created_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + }, + }, + 'sqlmesh.sushi.orders': { + name: 'sqlmesh.sushi.orders', + display_name: 'sushi.orders', + identifier: '123456789', + version: '123456789', + dialect: 'bigquery', + cron: '0 0 * * *', + owner: 'admin', + kind: 'INCREMENTAL_BY_TIME', + model_type: 'sql', + tags: ['test', 'tag', 'another tag'], + columns: { + user_id: { + data_type: 'STRING', + description: 'node', + columnLineageData: { + 'sqlmesh.sushi.orders': { + user_id: { + source: 'sqlmesh.sushi.raw_orders', + expression: 'select user_id from sqlmesh.sushi.raw_orders', + models: { + 'sqlmesh.sushi.raw_orders': ['user_id'], + }, + }, + }, + }, + }, + event_id: { + data_type: 'STRING', + description: 'node', + columnLineageData: { + 'sqlmesh.sushi.orders': { + event_id: { + models: { + 'sqlmesh.sushi.raw_orders': ['event_id'], + }, + }, + }, + }, + }, + product_id: { + data_type: 'STRING', + description: 'node', + }, + customer_id: { + data_type: 'STRING', + description: 'node', + }, + updated_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + deleted_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + expired_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + start_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + end_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + created_ts: { + data_type: 'TIMESTAMP', + description: 'node', + }, + }, + }, +} as Record + export const LineageModel = () => { return (
{ `} - } - lineageDetails={ - { - 'sqlmesh.sushi.raw_orders': { - name: 'sqlmesh.sushi.raw_orders', - display_name: 'sushi.raw_orders', - identifier: '123456789', - version: '123456789', - dialect: 'bigquery', - cron: '0 0 * * *', - owner: 'admin', - kind: 'INCREMENTAL_BY_TIME', - model_type: 'python', - tags: ['test', 'tag', 'another tag'], - columns: { - user_id: { - data_type: 'STRING', - description: 'node', - }, - event_id: { - data_type: 'STRING', - description: 'node', - }, - created_at: { - data_type: 'TIMESTAMP', - description: 'node', - }, - }, - }, - 'sqlmesh.sushi.orders': { - name: 'sqlmesh.sushi.orders', - display_name: 'sushi.orders', - identifier: '123456789', - version: '123456789', - dialect: 'bigquery', - cron: '0 0 * * *', - owner: 'admin', - kind: 'INCREMENTAL_BY_TIME', - model_type: 'sql', - tags: ['test', 'tag', 'another tag'], - columns: { - user_id: { - data_type: 'STRING', - description: 'node', - columnLineageData: { - 'sqlmesh.sushi.orders': { - user_id: { - source: 'sqlmesh.sushi.raw_orders', - expression: - 'select user_id from sqlmesh.sushi.raw_orders', - models: { - 'sqlmesh.sushi.raw_orders': ['user_id'], - }, - }, - }, - }, - }, - event_id: { - data_type: 'STRING', - description: 'node', - columnLineageData: { - 'sqlmesh.sushi.orders': { - event_id: { - models: { - 'sqlmesh.sushi.raw_orders': ['event_id'], - }, - }, - }, - }, - }, - product_id: { - data_type: 'STRING', - description: 'node', - }, - customer_id: { - data_type: 'STRING', - description: 'node', - }, - updated_at: { - data_type: 'TIMESTAMP', - description: 'node', - }, - deleted_at: { - data_type: 'TIMESTAMP', - description: 'node', - }, - expired_at: { - data_type: 'TIMESTAMP', - description: 'node', - }, - start_at: { - data_type: 'TIMESTAMP', - description: 'node', - }, - end_at: { - data_type: 'TIMESTAMP', - description: 'node', - }, - created_ts: { - data_type: 'TIMESTAMP', - description: 'node', - }, - }, - }, - } as LineageDetails - } + adjacencyList={adjacencyList as BrandedLineageAdjacencyList} + lineageDetails={lineageDetails as BrandedLineageDetails} className="rounded-2xl" />
diff --git a/web/common/src/components/Lineage/stories/ModelLineage.tsx b/web/common/src/components/Lineage/stories/ModelLineage.tsx index 46d19f9758..3df85ea1a5 100644 --- a/web/common/src/components/Lineage/stories/ModelLineage.tsx +++ b/web/common/src/components/Lineage/stories/ModelLineage.tsx @@ -1,8 +1,6 @@ -import { debounce } from 'lodash' import { Focus, LockOpen, Rows2, Rows3, Lock } from 'lucide-react' import React from 'react' -import { type ColumnLevelLineageAdjacencyList } from '../LineageColumnLevel/ColumnLevelLineageContext' import { MAX_COLUMNS_TO_DISPLAY, calculateColumnsHeight, @@ -11,16 +9,8 @@ import { getEdgesFromColumnLineage, } from '../LineageColumnLevel/help' import { useColumnLevelLineage } from '../LineageColumnLevel/useColumnLevelLineage' -import { LineageControlButton } from '../LineageControlButton' -import { LineageControlIcon } from '../LineageControlIcon' import { LineageLayout } from '../LineageLayout' import { FactoryEdgeWithGradient } from '../edge/FactoryEdgeWithGradient' -import { - toNodeID, - toPortID, - type LineageAdjacencyList, - type LineageDetails, -} from '../utils' import { calculateNodeBaseHeight, calculateNodeDetailsHeight, @@ -33,6 +23,8 @@ import { import { type LineageEdge, type LineageNodesMap, + toNodeID, + toPortID, ZOOM_THRESHOLD, } from '../utils' import { @@ -47,11 +39,23 @@ import { type ModelColumnID, type ModelEdgeId, type NodeType, + type BrandedLineageAdjacencyList, + type BrandedLineageDetails, + type BrandedColumnLevelLineageAdjacencyList, + type ModelColumn, + type ModelDisplayName, + type LeftHandleId, + type RightHandleId, + type LeftPortHandleId, + type RightPortHandleId, } from './ModelLineageContext' import { ModelNode } from './ModelNode' import { getNodeTypeColorVar } from './help' import { EdgeWithGradient } from '../edge/EdgeWithGradient' import { cleanupLayoutWorker, getLayoutedGraph } from '../layout/help' +import { LineageControlButton } from '../LineageControlButton' +import { LineageControlIcon } from '../LineageControlIcon' +import type { BrandedRecord } from '@/types' const nodeTypes = { node: ModelNode, @@ -67,8 +71,8 @@ export const ModelLineage = ({ lineageDetails, className, }: { - adjacencyList: LineageAdjacencyList - lineageDetails: LineageDetails + adjacencyList: BrandedLineageAdjacencyList + lineageDetails: BrandedLineageDetails selectedModelName?: ModelName className?: string }) => { @@ -76,7 +80,14 @@ export const ModelLineage = ({ const [isBuildingLayout, setIsBuildingLayout] = React.useState(false) const [nodesDraggable, setNodesDraggable] = React.useState(false) const [edges, setEdges] = React.useState< - LineageEdge[] + LineageEdge< + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + >[] >([]) const [nodesMap, setNodesMap] = React.useState< LineageNodesMap @@ -94,7 +105,7 @@ export const ModelLineage = ({ const [showColumns, setShowColumns] = React.useState(false) const [columnLevelLineage, setColumnLevelLineage] = React.useState< - Map> + Map >(new Map()) const [fetchingColumns, setFetchingColumns] = React.useState< Set @@ -104,9 +115,12 @@ export const ModelLineage = ({ adjacencyListColumnLevel, selectedColumns, adjacencyListKeysColumnLevel, - } = useColumnLevelLineage( - columnLevelLineage, - ) + } = useColumnLevelLineage< + ModelName, + ColumnName, + ModelColumnID, + BrandedColumnLevelLineageAdjacencyList + >(columnLevelLineage) const adjacencyListKeys = React.useMemo(() => { let keys: ModelName[] = [] @@ -124,18 +138,18 @@ export const ModelLineage = ({ (nodeId: ModelNodeId, detail: ModelLineageNodeDetails) => { const columns = detail.columns - const node = createNode('node', nodeId, { - name: detail.name, + const node = createNode('node', nodeId, { + name: detail.name as ModelName, + displayName: detail.display_name as ModelDisplayName, identifier: detail.identifier, model_type: detail.model_type as NodeType, kind: detail.kind!, cron: detail.cron, - displayName: detail.display_name, owner: detail.owner!, dialect: detail.dialect, version: detail.version, tags: detail.tags || [], - columns, + columns: columns as BrandedRecord, }) const selectedColumnsCount = new Set( Object.keys(columns ?? {}).map(k => toPortID(detail.name, k)), @@ -184,10 +198,10 @@ export const ModelLineage = ({ ( edgeType: string, edgeId: ModelEdgeId, - sourceId: ModelNodeId, - targetId: ModelNodeId, - sourceHandleId?: ModelColumnID, - targetHandleId?: ModelColumnID, + sourceId: LeftHandleId, + targetId: RightHandleId, + sourceHandleId?: LeftPortHandleId, + targetHandleId?: RightPortHandleId, ) => { const sourceNode = transformedNodesMap[sourceId] const targetNode = transformedNodesMap[targetId] @@ -217,7 +231,14 @@ export const ModelLineage = ({ data.strokeWidth = 2 } - return createEdge( + return createEdge< + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + >( edgeType, edgeId, sourceId, @@ -237,8 +258,11 @@ export const ModelLineage = ({ ColumnName, EdgeData, ModelEdgeId, - ModelNodeId, - ModelColumnID + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId, + BrandedColumnLevelLineageAdjacencyList >({ columnLineage: adjacencyListColumnLevel, transformEdge, @@ -252,38 +276,45 @@ export const ModelLineage = ({ : getTransformedModelEdgesSourceTargets< ModelName, EdgeData, - ModelNodeId, ModelEdgeId, - ModelColumnID + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId >(adjacencyListKeys, adjacencyList, transformEdge) }, [adjacencyListKeys, adjacencyList, transformEdge, edgesColumnLevel]) - const calculateLayout = React.useMemo(() => { - return debounce( - ( - eds: LineageEdge[], - nds: LineageNodesMap, - ) => - getLayoutedGraph( - eds, - nds, - new URL('./dagreLayout.worker.ts', import.meta.url), - ) - .then(({ edges, nodesMap }) => { - setEdges(edges) - setNodesMap(nodesMap) - }) - .catch(error => { - console.error('Layout processing failed:', error) - setEdges([]) - setNodesMap({}) - }) - .finally(() => { - setIsBuildingLayout(false) - }), - 200, - ) - }, []) + const calculateLayout = React.useCallback( + ( + eds: LineageEdge< + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + >[], + nds: LineageNodesMap, + ) => + getLayoutedGraph( + eds, + nds, + new URL('./dagreLayout.worker.ts', import.meta.url), + ) + .then(({ edges, nodesMap }) => { + setEdges(edges) + setNodesMap(nodesMap) + }) + .catch(error => { + console.error('Layout processing failed:', error) + setEdges([]) + setNodesMap({}) + }) + .finally(() => { + setIsBuildingLayout(false) + }), + [setEdges, setNodesMap, setIsBuildingLayout], + ) const nodes = React.useMemo(() => { return Object.values(nodesMap) @@ -291,7 +322,7 @@ export const ModelLineage = ({ const currentNode = React.useMemo(() => { return selectedModelName - ? nodesMap[toNodeID(selectedModelName)] + ? nodesMap[toNodeID(selectedModelName)] : null }, [selectedModelName, nodesMap]) @@ -315,30 +346,13 @@ export const ModelLineage = ({ selectedNodes, ) const onlySelectedEdges = transformedEdges.filter(edge => - selectedEdges.has(edge.id as ModelEdgeId), + selectedEdges.has(edge.id), ) calculateLayout(onlySelectedEdges, onlySelectedNodesMap) } else { calculateLayout(transformedEdges, transformedNodesMap) } - }, [ - calculateLayout, - showOnlySelectedNodes, - transformedEdges, - transformedNodesMap, - ]) - - React.useEffect(() => { - const currentNodeId = selectedModelName - ? toNodeID(selectedModelName) - : undefined - - if (currentNodeId && currentNodeId in nodesMap) { - setSelectedNodeId(currentNodeId) - } else { - handleReset() - } - }, [handleReset, selectedModelName, nodesMap]) + }, [showOnlySelectedNodes, transformedEdges, transformedNodesMap]) // Cleanup worker on unmount React.useEffect(() => () => cleanupLayoutWorker(), []) @@ -381,7 +395,10 @@ export const ModelLineage = ({ EdgeData, ModelNodeId, ModelEdgeId, - ModelColumnID + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId > isBuildingLayout={isBuildingLayout} useLineage={useModelLineage} diff --git a/web/common/src/components/Lineage/stories/ModelLineageContext.ts b/web/common/src/components/Lineage/stories/ModelLineageContext.ts index 98d2131766..745d9c2636 100644 --- a/web/common/src/components/Lineage/stories/ModelLineageContext.ts +++ b/web/common/src/components/Lineage/stories/ModelLineageContext.ts @@ -1,4 +1,4 @@ -import type { Branded } from '@/types' +import type { Branded, BrandedRecord } from '@/types' import { type ColumnLevelLineageAdjacencyList, type ColumnLevelLineageContextValue, @@ -10,22 +10,49 @@ import { createLineageContext, getInitial as getLineageContextInitial, } from '../LineageContext' -import { type PathType } from '../utils' +import { + type LineageAdjacencyList, + type LineageDetails, + type PathType, +} from '../utils' export type ModelName = Branded +export type ModelDisplayName = Branded export type ColumnName = Branded export type ModelColumnID = Branded -export type ModelNodeId = Branded export type ModelEdgeId = Branded +export type LeftHandleId = Branded +export type RightHandleId = Branded +export type ModelNodeId = LeftHandleId | RightHandleId +export type LeftPortHandleId = Branded +export type RightPortHandleId = Branded + +export type BrandedColumnLevelLineageAdjacencyList = + ColumnLevelLineageAdjacencyList & { + readonly __adjacencyListKeyBrand: ModelName + readonly __adjacencyListColumnKeyBrand: ColumnName + } + +export type BrandedLineageAdjacencyList = LineageAdjacencyList & { + readonly __adjacencyListKeyBrand: ModelName +} + +export type BrandedLineageDetails = LineageDetails< + ModelName, + ModelLineageNodeDetails +> & { + readonly __lineageDetailsKeyBrand: ModelName +} + export type ModelColumn = Column & { id: ModelColumnID name: ColumnName - columnLineageData?: ColumnLevelLineageAdjacencyList + columnLineageData?: BrandedColumnLevelLineageAdjacencyList } export type NodeType = 'sql' | 'python' export type ModelLineageNodeDetails = { - name: ModelName + name: string display_name: string identifier: string version: string @@ -35,12 +62,12 @@ export type ModelLineageNodeDetails = { kind?: string model_type?: string tags?: string[] - columns?: Record + columns?: BrandedRecord } export type NodeData = { name: ModelName - displayName: string + displayName: ModelDisplayName model_type: NodeType identifier: string version: string @@ -48,8 +75,8 @@ export type NodeData = { cron: string owner: string dialect: string - columns?: Record tags: string[] + columns?: BrandedRecord } export type EdgeData = { @@ -62,14 +89,18 @@ export type EdgeData = { export type ModelLineageContextValue = ColumnLevelLineageContextValue< ModelName, ColumnName, - ModelColumnID + ModelColumnID, + BrandedColumnLevelLineageAdjacencyList > & LineageContextValue< NodeData, EdgeData, ModelNodeId, ModelEdgeId, - ModelColumnID + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId > export const initial = { @@ -77,7 +108,8 @@ export const initial = { ...getColumnLevelLineageContextInitial< ModelName, ColumnName, - ModelColumnID + ModelColumnID, + BrandedColumnLevelLineageAdjacencyList >(), } @@ -86,7 +118,10 @@ export const { Provider, useLineage } = createLineageContext< EdgeData, ModelNodeId, ModelEdgeId, - ModelColumnID, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId, ModelLineageContextValue >(initial) diff --git a/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx b/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx index 35d4a0e592..dbb3f92dad 100644 --- a/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx +++ b/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx @@ -1,6 +1,5 @@ import React from 'react' -import { type ColumnLevelLineageAdjacencyList } from '../LineageColumnLevel/ColumnLevelLineageContext' import { FactoryColumn } from '../LineageColumnLevel/FactoryColumn' import { @@ -9,13 +8,19 @@ import { type ModelName, type ModelNodeId, type ColumnName, + type BrandedColumnLevelLineageAdjacencyList, + type LeftPortHandleId, + type RightPortHandleId, } from './ModelLineageContext' const ModelColumn = FactoryColumn< ModelName, ColumnName, ModelNodeId, - ModelColumnID + ModelColumnID, + LeftPortHandleId, + RightPortHandleId, + BrandedColumnLevelLineageAdjacencyList >(useModelLineage) export const ModelNodeColumn = React.memo(function ModelNodeColumn({ @@ -35,7 +40,7 @@ export const ModelNodeColumn = React.memo(function ModelNodeColumn({ type: string description?: string | null className?: string - columnLineageData?: ColumnLevelLineageAdjacencyList + columnLineageData?: BrandedColumnLevelLineageAdjacencyList }) { const { selectedColumns, setColumnLevelLineage } = useModelLineage() diff --git a/web/common/src/components/Lineage/utils.ts b/web/common/src/components/Lineage/utils.ts index 01a277f17a..4e2d55a5a0 100644 --- a/web/common/src/components/Lineage/utils.ts +++ b/web/common/src/components/Lineage/utils.ts @@ -4,6 +4,8 @@ import { type Edge, type Node } from '@xyflow/react' export type NodeId = Branded export type EdgeId = Branded export type PortId = Branded +export type HandleId = Branded +export type PortHandleId = Branded export type LineageNodeData = Record export type LineageEdgeData = Record @@ -29,25 +31,36 @@ export interface LineageNode< export interface LineageEdge< TEdgeData extends LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, > extends Edge { id: TEdgeID - source: TNodeID - target: TNodeID - sourceHandle?: TPortID - targetHandle?: TPortID + source: TSourceID + target: TTargetID + sourceHandle?: TSourceHandleID + targetHandle?: TTargetHandleID } export type LayoutedGraph< TNodeData extends LineageNodeData = LineageNodeData, TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, > = { - edges: LineageEdge[] + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] nodesMap: LineageNodesMap } @@ -60,17 +73,26 @@ export type TransformNodeFn< export type TransformEdgeFn< TEdgeData extends LineageEdgeData = LineageEdgeData, - TNodeID extends string = NodeId, TEdgeID extends string = EdgeId, - TPortID extends string = PortId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, > = ( edgeType: string, edgeId: TEdgeID, - sourceId: TNodeID, - targetId: TNodeID, - sourceColumnId?: TPortID, - targetColumnId?: TPortID, -) => LineageEdge + sourceId: TSourceID, + targetId: TTargetID, + sourceHandleId?: TSourceHandleID, + targetHandleId?: TTargetHandleID, +) => LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID +> export const DEFAULT_NODE_HEIGHT = 32 export const DEFAULT_NODE_WIDTH = 300 diff --git a/web/common/src/index.ts b/web/common/src/index.ts index 0748a6c78e..c3c65a8e77 100644 --- a/web/common/src/index.ts +++ b/web/common/src/index.ts @@ -66,6 +66,8 @@ export { cn, truncate } from '@/utils' export type { Brand, Branded, + BrandedString, + BrandedRecord, Size, HeadlineLevel, Side, diff --git a/web/common/src/types.ts b/web/common/src/types.ts index 3de26b205d..e8bdf3e9de 100644 --- a/web/common/src/types.ts +++ b/web/common/src/types.ts @@ -1,8 +1,46 @@ export declare const __brand: unique symbol - export type Brand = { [__brand]: B } + +/** + * Branded is a type that adds a brand to a type. It is a type that is used to + * ensure that the type is unique and that it is not possible to mix up types + * with the same brand. + * + * @example + * + * type UserId = Branded + * type UserName = Branded + * + * const userId = '123' as UserId + * const userName = 'John Doe' as UserName + * + * userId == userName -> compile error + */ export type Branded = T & Brand +/** + * Constraint that only accepts branded string types + */ +export type BrandedString = string & Brand + +/** + * BrandedRecord is a type that creates a branded Record type with strict key checking. + * This ensures that Record is NOT assignable to Record + * + * @example + * type ModelFQN = Branded + * type ModelName = Branded + * + * type FQNMap = BrandedRecord + * type NameMap = BrandedRecord + * + * const fqnMap: FQNMap = {} + * const nameMap: NameMap = fqnMap // TypeScript error! + */ +export type BrandedRecord = Record & { + readonly __recordKeyBrand: K +} + export type Callback = (data?: T) => void export type Size = '2xs' | 'xs' | 's' | 'm' | 'l' | 'xl' | '2xl' From 633e64e3d3479a01dc3f140b808497e88416d22f Mon Sep 17 00:00:00 2001 From: Giorgos Michas Date: Thu, 9 Oct 2025 15:59:24 +0300 Subject: [PATCH 073/173] fix: robust cluster_by config parsing (#5478) --- sqlmesh/dbt/model.py | 8 ++++- tests/dbt/test_transformation.py | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index d882f94942..fa84824a43 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -601,7 +601,13 @@ def to_sqlmesh( clustered_by = [] for c in self.cluster_by: try: - clustered_by.append(d.parse_one(c, dialect=model_dialect)) + cluster_expr = exp.maybe_parse( + c, into=exp.Cluster, prefix="CLUSTER BY", dialect=model_dialect + ) + for expr in cluster_expr.expressions: + clustered_by.append( + expr.this if isinstance(expr, exp.Ordered) else expr + ) except SqlglotError as e: raise ConfigError( f"Failed to parse model '{self.canonical_name(context)}' cluster_by field '{c}' in '{self.path}': {e}" diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index 304ac57731..97c5c37e75 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -2306,6 +2306,60 @@ def test_model_cluster_by(): ) assert model.to_sqlmesh(context).clustered_by == [] + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by="Bar, qux", + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.to_column('"BAR"'), + exp.to_column('"QUX"'), + ] + + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by=['"Bar,qux"'], + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.to_column('"Bar,qux"'), + ] + + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by='"Bar,qux"', + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.to_column('"Bar,qux"'), + ] + + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by=["to_date(Bar),qux"], + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.TsOrDsToDate(this=exp.to_column('"BAR"')), + exp.to_column('"QUX"'), + ] + def test_snowflake_dynamic_table(): context = DbtContext() From 33ec6d934ea034a1273dd4313e691049e66b22a3 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Thu, 9 Oct 2025 16:18:11 +0300 Subject: [PATCH 074/173] Chore: improve unit test validation (#5517) --- sqlmesh/core/test/definition.py | 3 +++ tests/core/test_test.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/sqlmesh/core/test/definition.py b/sqlmesh/core/test/definition.py index 4336e00ce0..8d8ca17702 100644 --- a/sqlmesh/core/test/definition.py +++ b/sqlmesh/core/test/definition.py @@ -454,6 +454,9 @@ def _validate_and_normalize_test(self) -> None: query = outputs.get("query") partial = outputs.pop("partial", None) + if ctes is None and query is None: + _raise_error("Incomplete test, outputs must contain 'query' or 'ctes'", self.path) + def _normalize_rows( values: t.List[Row] | t.Dict, name: str, diff --git a/tests/core/test_test.py b/tests/core/test_test.py index 56a44cc955..13e31703a1 100644 --- a/tests/core/test_test.py +++ b/tests/core/test_test.py @@ -1185,6 +1185,27 @@ def test_unknown_column_error() -> None: ) +def test_invalid_outputs_error() -> None: + with pytest.raises(TestError, match="Incomplete test, outputs must contain 'query' or 'ctes'"): + _create_test( + body=load_yaml( + """ +test_foo: + model: sushi.foo + inputs: + raw: + - id: 1 + outputs: + rows: + - id: 1 + """ + ), + test_name="test_foo", + model=_create_model("SELECT id FROM raw"), + context=Context(config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb"))), + ) + + def test_empty_rows(sushi_context: Context) -> None: _check_successful_or_raise( _create_test( From 17a73f0f471ce12162097ed484083c435245386d Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Thu, 9 Oct 2025 19:49:47 +0300 Subject: [PATCH 075/173] Chore: Add tag assertion in selectors tag test (#5518) --- tests/dbt/cli/test_selectors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/dbt/cli/test_selectors.py b/tests/dbt/cli/test_selectors.py index 3d50fe6ed2..a2c19057eb 100644 --- a/tests/dbt/cli/test_selectors.py +++ b/tests/dbt/cli/test_selectors.py @@ -215,6 +215,7 @@ def test_exclude_by_dbt_names( ctx = jaffle_shop_duckdb_context ctx.load() assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + assert ctx.get_model('"jaffle_shop"."main"."agg_orders"').tags == ["agg"] selector = ctx._new_selector() assert isinstance(selector, DbtSelector) From fd54170c2a6ad2c0ab7d70d5b8c015d50c3a3b88 Mon Sep 17 00:00:00 2001 From: Ben <9087625+benfdking@users.noreply.github.com> Date: Thu, 9 Oct 2025 19:28:09 +0100 Subject: [PATCH 076/173] chore: small dbt test addition (#5519) --- tests/dbt/cli/test_selectors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/dbt/cli/test_selectors.py b/tests/dbt/cli/test_selectors.py index a2c19057eb..17f0195f58 100644 --- a/tests/dbt/cli/test_selectors.py +++ b/tests/dbt/cli/test_selectors.py @@ -165,6 +165,7 @@ def test_select_by_dbt_names( ctx = jaffle_shop_duckdb_context ctx.load() assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + assert ctx.get_model('"jaffle_shop"."main"."agg_orders"').tags == ["agg"] selector = ctx._new_selector() assert isinstance(selector, DbtSelector) From 84da6ac33b1632a9719eca062de07fa4480f28b4 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Fri, 10 Oct 2025 08:06:26 +1300 Subject: [PATCH 077/173] Feat(sqlmesh_dbt): Add support for --log-level (#5514) --- sqlmesh/__init__.py | 12 ++++++++++-- sqlmesh_dbt/cli.py | 14 +++++++++++++- sqlmesh_dbt/operations.py | 3 ++- tests/dbt/cli/test_global_flags.py | 14 ++++++++++++++ tests/dbt/cli/test_operations.py | 11 +++++++++++ 5 files changed, 50 insertions(+), 4 deletions(-) diff --git a/sqlmesh/__init__.py b/sqlmesh/__init__.py index 7712a41379..577a3aaf02 100644 --- a/sqlmesh/__init__.py +++ b/sqlmesh/__init__.py @@ -188,6 +188,7 @@ def configure_logging( write_to_file: bool = True, log_file_dir: t.Optional[t.Union[str, Path]] = None, ignore_warnings: bool = False, + log_level: t.Optional[t.Union[str, int]] = None, ) -> None: # Remove noisy grpc logs that are not useful for users os.environ["GRPC_VERBOSITY"] = os.environ.get("GRPC_VERBOSITY", "NONE") @@ -195,8 +196,15 @@ def configure_logging( logger = logging.getLogger() debug = force_debug or debug_mode_enabled() - # base logger needs to be the lowest level that we plan to log - level = logging.DEBUG if debug else logging.INFO + if log_level is not None: + if isinstance(log_level, str): + level = logging._nameToLevel.get(log_level.upper()) or logging.INFO + else: + level = log_level + else: + # base logger needs to be the lowest level that we plan to log + level = logging.DEBUG if debug else logging.INFO + logger.setLevel(level) if debug: diff --git a/sqlmesh_dbt/cli.py b/sqlmesh_dbt/cli.py index ec11e7730e..981384fa64 100644 --- a/sqlmesh_dbt/cli.py +++ b/sqlmesh_dbt/cli.py @@ -78,6 +78,12 @@ def _cleanup() -> None: default=False, help="Display debug logging during dbt execution. Useful for debugging and making bug reports events to help when debugging.", ) +@click.option( + "--log-level", + default="info", + type=click.Choice(["debug", "info", "warn", "error", "none"]), + help="Specify the minimum severity of events that are logged to the console and the log file.", +) @click.pass_context @cli_global_error_handler def dbt( @@ -85,6 +91,7 @@ def dbt( profile: t.Optional[str] = None, target: t.Optional[str] = None, debug: bool = False, + log_level: t.Optional[str] = None, ) -> None: """ An ELT tool for managing your SQL transformations and data models, powered by the SQLMesh engine. @@ -97,7 +104,12 @@ def dbt( # we have a partially applied function here because subcommands might set extra options like --vars # that need to be known before we attempt to load the project ctx.obj = functools.partial( - create, project_dir=Path.cwd(), profile=profile, target=target, debug=debug + create, + project_dir=Path.cwd(), + profile=profile, + target=target, + debug=debug, + log_level=log_level, ) if not ctx.invoked_subcommand: diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py index cb1ac217cc..810046dead 100644 --- a/sqlmesh_dbt/operations.py +++ b/sqlmesh_dbt/operations.py @@ -237,6 +237,7 @@ def create( vars: t.Optional[t.Dict[str, t.Any]] = None, threads: t.Optional[int] = None, debug: bool = False, + log_level: t.Optional[str] = None, ) -> DbtOperations: with Progress(transient=True) as progress: # Indeterminate progress bar before SQLMesh import to provide feedback to the user that something is indeed happening @@ -256,7 +257,7 @@ def create( while root_logger.hasHandlers(): root_logger.removeHandler(root_logger.handlers[0]) - configure_logging(force_debug=debug) + configure_logging(force_debug=debug, log_level=log_level) set_console(DbtCliConsole()) progress.update(load_task_id, description="Loading project", total=None) diff --git a/tests/dbt/cli/test_global_flags.py b/tests/dbt/cli/test_global_flags.py index 66dee7236c..abdb1ac41b 100644 --- a/tests/dbt/cli/test_global_flags.py +++ b/tests/dbt/cli/test_global_flags.py @@ -1,10 +1,12 @@ import typing as t from pathlib import Path import pytest +import logging from pytest_mock import MockerFixture from click.testing import Result from sqlmesh.utils.errors import SQLMeshError from sqlglot.errors import SqlglotError +from tests.dbt.conftest import EmptyProjectCreator pytestmark = pytest.mark.slow @@ -93,3 +95,15 @@ def test_run_error_handler( assert result.exit_code == 1 assert "Error: Error with selector" in result.output assert "Traceback" not in result.output + + +def test_log_level(invoke_cli: t.Callable[..., Result], create_empty_project: EmptyProjectCreator): + create_empty_project() + + result = invoke_cli(["--log-level", "info", "list"]) + assert result.exit_code == 0 + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.INFO + + result = invoke_cli(["--log-level", "debug", "list"]) + assert result.exit_code == 0 + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.DEBUG diff --git a/tests/dbt/cli/test_operations.py b/tests/dbt/cli/test_operations.py index 139336297c..4aa508e21f 100644 --- a/tests/dbt/cli/test_operations.py +++ b/tests/dbt/cli/test_operations.py @@ -9,6 +9,7 @@ from sqlmesh.core.plan import PlanBuilder from sqlmesh.core.config.common import VirtualEnvironmentMode from tests.dbt.conftest import EmptyProjectCreator +import logging pytestmark = pytest.mark.slow @@ -363,3 +364,13 @@ def test_create_sets_concurrent_tasks_based_on_threads(create_empty_project: Emp g.connection and g.connection.concurrent_tasks == 16 for g in operations.context.config.gateways.values() ) + + +def test_create_configures_log_level(create_empty_project: EmptyProjectCreator): + project_dir, _ = create_empty_project() + + create(project_dir=project_dir, log_level="info") + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.INFO + + create(project_dir=project_dir, log_level="error") + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.ERROR From 5435ff820c3a76fb200e4ee129ef43d2abbf6940 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Fri, 10 Oct 2025 08:06:52 +1300 Subject: [PATCH 078/173] Fix(windows): Allow 'sqlmesh clean' to delete cache file paths that exceed 260 chars (#5512) --- sqlmesh/core/context.py | 16 +++++++++------ sqlmesh/utils/windows.py | 16 ++++++++++++--- tests/core/test_context.py | 40 +++++++++++++++++++++++++++++++++++++ tests/utils/test_windows.py | 39 ++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 tests/utils/test_windows.py diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index d118116f7f..f9d54b0564 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -139,6 +139,7 @@ ) from sqlmesh.utils.config import print_config from sqlmesh.utils.jinja import JinjaMacroRegistry +from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path if t.TYPE_CHECKING: import pandas as pd @@ -2590,12 +2591,15 @@ def table_name( ) def clear_caches(self) -> None: - for path in self.configs: - cache_path = path / c.CACHE - if cache_path.exists(): - rmtree(cache_path) - if self.cache_dir.exists(): - rmtree(self.cache_dir) + paths_to_remove = [path / c.CACHE for path in self.configs] + paths_to_remove.append(self.cache_dir) + + if IS_WINDOWS: + paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] + + for path in paths_to_remove: + if path.exists(): + rmtree(path) if isinstance(self._state_sync, CachingStateSync): self._state_sync.clear_cache() diff --git a/sqlmesh/utils/windows.py b/sqlmesh/utils/windows.py index 238ed353de..b2de5b8af9 100644 --- a/sqlmesh/utils/windows.py +++ b/sqlmesh/utils/windows.py @@ -3,12 +3,22 @@ IS_WINDOWS = platform.system() == "Windows" +WINDOWS_LONGPATH_PREFIX = "\\\\?\\" + def fix_windows_path(path: Path) -> Path: """ Windows paths are limited to 260 characters: https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation Users can change this by updating a registry entry but we cant rely on that. - We can quite commonly generate a cache file path that exceeds 260 characters which causes a FileNotFound error. - If we prefix the path with "\\?\" then we can have paths up to 32,767 characters + + SQLMesh quite commonly generates cache file paths that exceed 260 characters and thus cause a FileNotFound error. + If we prefix paths with "\\?\" then we can have paths up to 32,767 characters. + + Note that this prefix also means that relative paths no longer work. From the above docs: + > Because you cannot use the "\\?\" prefix with a relative path, relative paths are always limited to a total of MAX_PATH characters. + + So we also call path.resolve() to resolve the relative sections so that operations like `path.read_text()` continue to work """ - return Path("\\\\?\\" + str(path.absolute())) + if path.parts and not path.parts[0].startswith(WINDOWS_LONGPATH_PREFIX): + path = Path(WINDOWS_LONGPATH_PREFIX + str(path.absolute())) + return path.resolve() diff --git a/tests/core/test_context.py b/tests/core/test_context.py index 60ea3fd451..54b8cd891a 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -62,6 +62,7 @@ NoChangesPlanError, ) from sqlmesh.utils.metaprogramming import Executable +from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path from tests.utils.test_helpers import use_terminal_console from tests.utils.test_filesystem import create_temp_file @@ -700,6 +701,45 @@ def test_clear_caches(tmp_path: pathlib.Path): assert not cache_dir.exists() +def test_clear_caches_with_long_base_path(tmp_path: pathlib.Path): + base_path = tmp_path / ("abcde" * 50) + assert ( + len(str(base_path.absolute())) > 260 + ) # Paths longer than 260 chars trigger problems on Windows + + default_cache_dir = base_path / c.CACHE + custom_cache_dir = base_path / ".test_cache" + + # note: we create the Context here so it doesnt get passed any "fixed" paths + ctx = Context(config=Config(cache_dir=str(custom_cache_dir)), paths=base_path) + + if IS_WINDOWS: + # fix these so we can use them in this test + default_cache_dir = fix_windows_path(default_cache_dir) + custom_cache_dir = fix_windows_path(custom_cache_dir) + + default_cache_dir.mkdir(parents=True) + custom_cache_dir.mkdir(parents=True) + + default_cache_file = default_cache_dir / "cache.txt" + custom_cache_file = custom_cache_dir / "cache.txt" + + default_cache_file.write_text("test") + custom_cache_file.write_text("test") + + assert default_cache_file.exists() + assert custom_cache_file.exists() + assert default_cache_dir.exists() + assert custom_cache_dir.exists() + + ctx.clear_caches() + + assert not default_cache_file.exists() + assert not custom_cache_file.exists() + assert not default_cache_dir.exists() + assert not custom_cache_dir.exists() + + def test_cache_path_configurations(tmp_path: pathlib.Path): project_dir = tmp_path / "project" project_dir.mkdir(parents=True) diff --git a/tests/utils/test_windows.py b/tests/utils/test_windows.py new file mode 100644 index 0000000000..196589d9c2 --- /dev/null +++ b/tests/utils/test_windows.py @@ -0,0 +1,39 @@ +import pytest +from pathlib import Path +from sqlmesh.utils.windows import IS_WINDOWS, WINDOWS_LONGPATH_PREFIX, fix_windows_path + + +@pytest.mark.skipif( + not IS_WINDOWS, reason="pathlib.Path only produces WindowsPath objects on Windows" +) +def test_fix_windows_path(): + short_path = Path("c:\\foo") + short_path_prefixed = Path(WINDOWS_LONGPATH_PREFIX + "c:\\foo") + + segments = "\\".join(["bar", "baz", "bing"] * 50) + long_path = Path("c:\\" + segments) + long_path_prefixed = Path(WINDOWS_LONGPATH_PREFIX + "c:\\" + segments) + + assert len(str(short_path.absolute)) < 260 + assert len(str(long_path.absolute)) > 260 + + # paths less than 260 chars are still prefixed because they may be being used as a base path + assert fix_windows_path(short_path) == short_path_prefixed + + # paths greater than 260 characters don't work at all without the prefix + assert fix_windows_path(long_path) == long_path_prefixed + + # multiple calls dont keep appending the same prefix + assert ( + fix_windows_path(fix_windows_path(fix_windows_path(long_path_prefixed))) + == long_path_prefixed + ) + + # paths with relative sections need to have relative sections resolved before they can be used + # since the \\?\ prefix doesnt work for paths with relative sections + assert fix_windows_path(Path("c:\\foo\\..\\bar")) == Path(WINDOWS_LONGPATH_PREFIX + "c:\\bar") + + # also check that relative sections are still resolved if they are added to a previously prefixed path + base = fix_windows_path(Path("c:\\foo")) + assert base == Path(WINDOWS_LONGPATH_PREFIX + "c:\\foo") + assert fix_windows_path(base / ".." / "bar") == Path(WINDOWS_LONGPATH_PREFIX + "c:\\bar") From 725ebccd116df1f94da5e3a875cd021e204a20d7 Mon Sep 17 00:00:00 2001 From: Max Mykal Date: Thu, 9 Oct 2025 17:26:04 -0700 Subject: [PATCH 079/173] feat(web_common): make simpler including common package directly within repo (#5523) --- web/common/src/components/Badge/Badge.css | 2 +- .../src/components/Badge/Badge.stories.tsx | 2 +- web/common/src/components/Badge/Badge.tsx | 4 +- web/common/src/components/Button/Button.css | 2 +- .../src/components/Button/Button.stories.tsx | 2 +- web/common/src/components/Button/Button.tsx | 4 +- .../src/components/CopyButton/CopyButton.tsx | 7 +- .../HorizontalContainer.tsx | 2 +- web/common/src/components/Input/Input.css | 2 +- web/common/src/components/Input/Input.tsx | 4 +- .../LineageColumnLevel/FactoryColumn.css | 2 +- .../LineageColumnLevel/FactoryColumn.tsx | 12 +- .../src/components/Lineage/LineageContext.ts | 10 +- .../Lineage/LineageControlButton.tsx | 2 +- .../components/Lineage/LineageControlIcon.tsx | 2 +- .../src/components/Lineage/LineageLayout.tsx | 9 + .../components/Lineage/LineageLayoutBase.tsx | 173 ++++++++---------- .../Lineage/LineageLayoutContainer.tsx | 2 +- .../components/Lineage/layout/dagreLayout.ts | 11 +- .../components/Lineage/node/NodeAppendix.tsx | 2 +- .../src/components/Lineage/node/NodeBadge.tsx | 4 +- .../src/components/Lineage/node/NodeBase.tsx | 4 +- .../components/Lineage/node/NodeContainer.tsx | 4 +- .../components/Lineage/node/NodeDetail.tsx | 2 +- .../components/Lineage/node/NodeHandle.tsx | 2 +- .../Lineage/node/NodeHandleIcon.tsx | 2 +- .../components/Lineage/node/NodeHandles.tsx | 4 +- .../components/Lineage/node/NodeHeader.tsx | 2 +- .../src/components/Lineage/node/NodePort.tsx | 12 +- .../src/components/Lineage/node/NodePorts.tsx | 6 +- .../components/Lineage/node/base-handle.tsx | 2 +- .../src/components/Lineage/node/base-node.tsx | 2 +- .../Lineage/stories/Lineage.stories.tsx | 2 +- .../Lineage/stories/ModelLineage.tsx | 21 ++- .../Lineage/stories/ModelLineageContext.ts | 2 +- .../components/Lineage/stories/ModelNode.tsx | 14 +- .../Lineage/stories/dagreLayout.worker.ts | 4 +- web/common/src/components/Lineage/utils.ts | 2 +- .../LoadingContainer.stories.tsx | 2 +- .../LoadingContainer/LoadingContainer.tsx | 4 +- .../LoadingContainer/LoadingIcon.tsx | 2 +- .../MessageContainer/MessageContainer.css | 2 +- .../MessageContainer/MessageContainer.tsx | 2 +- .../src/components/Metadata/Metadata.css | 2 +- .../src/components/Metadata/Metadata.tsx | 2 +- .../src/components/ModelName/ModelName.css | 4 +- .../src/components/ModelName/ModelName.tsx | 4 +- .../ScrollContainer/ScrollContainer.css | 2 +- .../ScrollContainer/ScrollContainer.tsx | 4 +- web/common/src/components/Tooltip/Tooltip.css | 2 +- .../components/Tooltip/Tooltip.stories.tsx | 4 +- web/common/src/components/Tooltip/Tooltip.tsx | 2 +- .../src/components/Typography/Description.tsx | 2 +- .../src/components/Typography/Headline.tsx | 4 +- .../src/components/Typography/Information.tsx | 4 +- .../src/components/Typography/Tagline.tsx | 2 +- web/common/src/components/Typography/Text.tsx | 2 +- web/common/src/components/Typography/help.ts | 2 +- .../VerticalContainer/VerticalContainer.tsx | 2 +- .../components/VirtualList/FilterableList.css | 2 +- .../components/VirtualList/FilterableList.tsx | 2 +- .../components/VirtualList/VirtualList.tsx | 2 +- web/common/src/index.ts | 50 ++--- web/common/src/styles/design/index.css | 1 - web/common/tsconfig.base.json | 2 +- web/common/vite.config.js | 4 +- 66 files changed, 238 insertions(+), 228 deletions(-) diff --git a/web/common/src/components/Badge/Badge.css b/web/common/src/components/Badge/Badge.css index 582a1264fb..0efef35e41 100644 --- a/web/common/src/components/Badge/Badge.css +++ b/web/common/src/components/Badge/Badge.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-badge-background: var(--color-neutral-100); --color-badge-foreground: var(--color-prose); } diff --git a/web/common/src/components/Badge/Badge.stories.tsx b/web/common/src/components/Badge/Badge.stories.tsx index 09754d29a8..143440037e 100644 --- a/web/common/src/components/Badge/Badge.stories.tsx +++ b/web/common/src/components/Badge/Badge.stories.tsx @@ -1,6 +1,6 @@ import type { Meta, StoryObj } from '@storybook/react-vite' -import type { Shape, Size } from '@/types' +import type { Shape, Size } from '@sqlmesh-common/types' import { Badge } from './Badge' const meta: Meta = { diff --git a/web/common/src/components/Badge/Badge.tsx b/web/common/src/components/Badge/Badge.tsx index cd6df21c26..2bc23940ad 100644 --- a/web/common/src/components/Badge/Badge.tsx +++ b/web/common/src/components/Badge/Badge.tsx @@ -1,8 +1,8 @@ import { Slot } from '@radix-ui/react-slot' import React from 'react' -import type { Shape, Size } from '@/types' -import { cn } from '@/utils' +import type { Shape, Size } from '@sqlmesh-common/types' +import { cn } from '@sqlmesh-common/utils' import { cva } from 'class-variance-authority' import './Badge.css' diff --git a/web/common/src/components/Button/Button.css b/web/common/src/components/Button/Button.css index 7e8b856bf3..a95397dabb 100644 --- a/web/common/src/components/Button/Button.css +++ b/web/common/src/components/Button/Button.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-button-primary-background: var(--color-action); --color-button-primary-foreground: var(--color-light); --color-button-primary-hover: var(--color-action-hover); diff --git a/web/common/src/components/Button/Button.stories.tsx b/web/common/src/components/Button/Button.stories.tsx index 57fb9f26e2..8836a35a5c 100644 --- a/web/common/src/components/Button/Button.stories.tsx +++ b/web/common/src/components/Button/Button.stories.tsx @@ -1,5 +1,5 @@ import type { Meta, StoryObj } from '@storybook/react-vite' -import type { Size } from '@/types' +import type { Size } from '@sqlmesh-common/types' import { Button, type ButtonVariant } from './Button' import { fn, expect, userEvent, within } from 'storybook/test' diff --git a/web/common/src/components/Button/Button.tsx b/web/common/src/components/Button/Button.tsx index cc34ce192a..fd9baebdf2 100644 --- a/web/common/src/components/Button/Button.tsx +++ b/web/common/src/components/Button/Button.tsx @@ -2,8 +2,8 @@ import React from 'react' import { Slot } from '@radix-ui/react-slot' import { cva } from 'class-variance-authority' -import { cn } from '@/utils' -import type { Shape, Size } from '@/types' +import { cn } from '@sqlmesh-common/utils' +import type { Shape, Size } from '@sqlmesh-common/types' import './Button.css' diff --git a/web/common/src/components/CopyButton/CopyButton.tsx b/web/common/src/components/CopyButton/CopyButton.tsx index 3647121f82..1e5ba2580e 100644 --- a/web/common/src/components/CopyButton/CopyButton.tsx +++ b/web/common/src/components/CopyButton/CopyButton.tsx @@ -1,7 +1,10 @@ import React from 'react' -import { Button, type ButtonProps } from '@/components/Button/Button' -import { useCopyClipboard } from '@/hooks/useCopyClipboard' +import { + Button, + type ButtonProps, +} from '@sqlmesh-common/components/Button/Button' +import { useCopyClipboard } from '@sqlmesh-common/hooks/useCopyClipboard' export interface CopyButtonProps extends Omit { text: string diff --git a/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx b/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx index c1e2c66ed0..b92eaa418b 100644 --- a/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx +++ b/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx @@ -1,6 +1,6 @@ import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { ScrollContainer } from '../ScrollContainer/ScrollContainer' export interface HorizontalContainerProps diff --git a/web/common/src/components/Input/Input.css b/web/common/src/components/Input/Input.css index 0baae3c6bb..2cb6ab9695 100644 --- a/web/common/src/components/Input/Input.css +++ b/web/common/src/components/Input/Input.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-input-background: var(--color-light); --color-input-background-translucid: var(--color-neutral-5); --color-input-foreground: var(--color-prose); diff --git a/web/common/src/components/Input/Input.tsx b/web/common/src/components/Input/Input.tsx index 10ba151ab4..8d5c6fc7e4 100644 --- a/web/common/src/components/Input/Input.tsx +++ b/web/common/src/components/Input/Input.tsx @@ -1,6 +1,6 @@ import * as React from 'react' -import { cn } from '@/utils' -import type { Size } from '@/types' +import { cn } from '@sqlmesh-common/utils' +import type { Size } from '@sqlmesh-common/types' import { cva } from 'class-variance-authority' import './Input.css' diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css index d6eea6674a..8da848c684 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-lineage-model-column-badge-background: var( --color-lineage-node-badge-background ); diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx index 350437c16e..294d3ca462 100644 --- a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx @@ -7,7 +7,7 @@ import { } from 'lucide-react' import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { NodeBadge } from '../node/NodeBadge' import { NodePort } from '../node/NodePort' import { type NodeId, type PortHandleId, type PortId } from '../utils' @@ -15,11 +15,11 @@ import { type ColumnLevelLineageAdjacencyList, type ColumnLevelLineageContextHook, } from './ColumnLevelLineageContext' -import { Tooltip } from '@/components/Tooltip/Tooltip' -import { Metadata } from '@/components/Metadata/Metadata' -import { HorizontalContainer } from '@/components/HorizontalContainer/HorizontalContainer' -import { Information } from '@/components/Typography/Information' -import { LoadingContainer } from '@/components/LoadingContainer/LoadingContainer' +import { Tooltip } from '@sqlmesh-common/components/Tooltip/Tooltip' +import { Metadata } from '@sqlmesh-common/components/Metadata/Metadata' +import { HorizontalContainer } from '@sqlmesh-common/components/HorizontalContainer/HorizontalContainer' +import { Information } from '@sqlmesh-common/components/Typography/Information' +import { LoadingContainer } from '@sqlmesh-common/components/LoadingContainer/LoadingContainer' import './FactoryColumn.css' diff --git a/web/common/src/components/Lineage/LineageContext.ts b/web/common/src/components/Lineage/LineageContext.ts index 4a90031217..7c76c2cfd4 100644 --- a/web/common/src/components/Lineage/LineageContext.ts +++ b/web/common/src/components/Lineage/LineageContext.ts @@ -59,7 +59,11 @@ export interface LineageContextValue< > nodes: LineageNode[] nodesMap: LineageNodesMap - setNodesMap: React.Dispatch>> + setNodesMap: React.Dispatch< + React.SetStateAction> + > + currentNodeId: TNodeID | null + selectedNode: LineageNode | null currentNode: LineageNode | null } @@ -74,7 +78,6 @@ export function getInitial< setSelectedNodes: () => {}, selectedEdges: new Set(), setSelectedEdges: () => {}, - selectedNodeId: null, setSelectedNodeId: () => {}, zoom: ZOOM_THRESHOLD, setZoom: () => {}, @@ -83,6 +86,9 @@ export function getInitial< nodes: [], nodesMap: {}, setNodesMap: () => {}, + selectedNodeId: null, + selectedNode: null, + currentNodeId: null, currentNode: null, } } diff --git a/web/common/src/components/Lineage/LineageControlButton.tsx b/web/common/src/components/Lineage/LineageControlButton.tsx index d3f3d5d215..14c7a6f2de 100644 --- a/web/common/src/components/Lineage/LineageControlButton.tsx +++ b/web/common/src/components/Lineage/LineageControlButton.tsx @@ -1,6 +1,6 @@ import { ControlButton } from '@xyflow/react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { Tooltip } from '../Tooltip/Tooltip' export function LineageControlButton({ diff --git a/web/common/src/components/Lineage/LineageControlIcon.tsx b/web/common/src/components/Lineage/LineageControlIcon.tsx index f8bc679c6d..a16f611a63 100644 --- a/web/common/src/components/Lineage/LineageControlIcon.tsx +++ b/web/common/src/components/Lineage/LineageControlIcon.tsx @@ -1,6 +1,6 @@ import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export interface LineageControlIconProps extends React.SVGProps { Icon: React.ElementType diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx index a9b5ec512f..e19046780a 100644 --- a/web/common/src/components/Lineage/LineageLayout.tsx +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -41,6 +41,9 @@ export function LineageLayout< useLineage, onNodeClick, onNodeDoubleClick, + showControlOnlySelectedNodes, + showControlZoomToCurrentNode, + showControlZoomToSelectedNode, }: { useLineage: LineageContextHook< TNodeData, @@ -58,6 +61,9 @@ export function LineageLayout< className?: string nodesDraggable?: boolean nodesConnectable?: boolean + showControlOnlySelectedNodes?: boolean + showControlZoomToCurrentNode?: boolean + showControlZoomToSelectedNode?: boolean controls?: | React.ReactNode | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) @@ -85,6 +91,9 @@ export function LineageLayout< useLineage={useLineage} onNodeClick={onNodeClick} onNodeDoubleClick={onNodeDoubleClick} + showControlOnlySelectedNodes={showControlOnlySelectedNodes} + showControlZoomToCurrentNode={showControlZoomToCurrentNode} + showControlZoomToSelectedNode={showControlZoomToSelectedNode} /> diff --git a/web/common/src/components/Lineage/LineageLayoutBase.tsx b/web/common/src/components/Lineage/LineageLayoutBase.tsx index 6d3975d19a..93a55858bb 100644 --- a/web/common/src/components/Lineage/LineageLayoutBase.tsx +++ b/web/common/src/components/Lineage/LineageLayoutBase.tsx @@ -44,7 +44,7 @@ import { import '@xyflow/react/dist/style.css' import './Lineage.css' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export function LineageLayoutBase< TNodeData extends LineageNodeData = LineageNodeData, @@ -65,6 +65,9 @@ export function LineageLayoutBase< useLineage, onNodeClick, onNodeDoubleClick, + showControlOnlySelectedNodes = true, + showControlZoomToCurrentNode = true, + showControlZoomToSelectedNode = true, }: { useLineage: LineageContextHook< TNodeData, @@ -81,6 +84,9 @@ export function LineageLayoutBase< nodeTypes?: NodeTypes edgeTypes?: EdgeTypes className?: string + showControlOnlySelectedNodes?: boolean + showControlZoomToCurrentNode?: boolean + showControlZoomToSelectedNode?: boolean controls?: | React.ReactNode | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) @@ -100,8 +106,9 @@ export function LineageLayoutBase< currentNode, zoom, nodes: initialNodes, - edges: initialEdges, - nodesMap, + edges, + setEdges, + selectedNode, showOnlySelectedNodes, selectedNodeId, setZoom, @@ -111,27 +118,14 @@ export function LineageLayoutBase< setSelectedEdges, } = useLineage() - const [nodes, setNodes] = React.useState[]>( - [], - ) - const [edges, setEdges] = React.useState< - LineageEdge< - TEdgeData, - TEdgeID, - TSourceID, - TTargetID, - TSourceHandleID, - TTargetHandleID - >[] - >([]) + const [nodes, setNodes] = + React.useState[]>(initialNodes) const onNodesChange = React.useCallback( (changes: NodeChange>[]) => { - setNodes( - applyNodeChanges>(changes, nodes), - ) + setNodes(applyNodeChanges(changes, nodes)) }, - [nodes, setNodes], + [nodes], ) const onEdgesChange = React.useCallback( @@ -160,7 +154,7 @@ export function LineageLayoutBase< >(changes, edges), ) }, - [edges, setEdges], + [edges], ) const updateZoom = React.useMemo(() => debounce(setZoom, 200), [setZoom]) @@ -174,20 +168,19 @@ export function LineageLayoutBase< }) } }, - [currentNode, setCenter], + [currentNode?.position.x, currentNode?.position.y], ) const zoomToSelectedNode = React.useCallback( (zoom: number = DEFAULT_ZOOM) => { - const node = selectedNodeId ? nodesMap[selectedNodeId] : null - if (node) { - setCenter(node.position.x, node.position.y, { + if (selectedNode) { + setCenter(selectedNode.position.x, selectedNode.position.y, { zoom, duration: 0, }) } }, - [nodesMap, selectedNodeId, setCenter], + [selectedNode?.position.x, selectedNode?.position.y], ) const getAllIncomers = React.useCallback( @@ -202,13 +195,13 @@ export function LineageLayoutBase< return Array.from( new Set>([ node, - ...getIncomers(node, nodes, edges) + ...getIncomers(node, initialNodes, edges) .map(n => getAllIncomers(n, visited)) .flat(), ]), ) }, - [nodes, edges], + [initialNodes, edges], ) const getAllOutgoers = React.useCallback( @@ -223,48 +216,32 @@ export function LineageLayoutBase< return Array.from( new Set>([ node, - ...getOutgoers(node, nodes, edges) + ...getOutgoers(node, initialNodes, edges) .map(n => getAllOutgoers(n, visited)) .flat(), ]), ) }, - [nodes, edges], + [initialNodes, edges], ) - React.useEffect(() => { - setNodes(initialNodes) - }, [initialNodes]) + const connectedNodes = React.useMemo(() => { + if (selectedNode == null) return [] - React.useEffect(() => { - setEdges(initialEdges) - }, [initialEdges]) - - React.useEffect(() => { - if (selectedNodeId == null) { - setShowOnlySelectedNodes(false) - setSelectedNodes(new Set()) - setSelectedEdges(new Set()) - - return - } - - const node = selectedNodeId ? nodesMap[selectedNodeId] : null - - if (node == null) { - setSelectedNodeId(null) - return - } - - const incomers = getAllIncomers(node) - const outgoers = getAllOutgoers(node) - const connectedNodes = [...incomers, ...outgoers] + const all = [ + ...getAllIncomers(selectedNode), + ...getAllOutgoers(selectedNode), + ] if (currentNode) { - connectedNodes.push(currentNode) + all.push(currentNode) } - const connectedEdges = getConnectedEdges< + return all + }, [selectedNode, currentNode, getAllIncomers, getAllOutgoers]) + + const connectedEdges = React.useMemo(() => { + return getConnectedEdges< LineageNode, LineageEdge< TEdgeData, @@ -275,6 +252,25 @@ export function LineageLayoutBase< TTargetHandleID > >(connectedNodes, edges) + }, [connectedNodes, edges]) + + React.useEffect(() => { + setNodes(initialNodes) + }, [initialNodes]) + + React.useEffect(() => { + if (selectedNodeId == null) { + setShowOnlySelectedNodes(false) + setSelectedNodes(new Set()) + setSelectedEdges(new Set()) + } else { + if (selectedNode == null) { + setSelectedNodeId(null) + } + } + }, [selectedNodeId, selectedNode]) + + React.useEffect(() => { const selectedNodes = new Set(connectedNodes.map(node => node.id)) const selectedEdges = new Set( connectedEdges.reduce((acc, edge) => { @@ -294,24 +290,11 @@ export function LineageLayoutBase< setSelectedNodes(selectedNodes) setSelectedEdges(selectedEdges) - }, [ - currentNode, - selectedNodeId, - setSelectedNodes, - setSelectedEdges, - getAllIncomers, - getAllOutgoers, - setShowOnlySelectedNodes, - setSelectedNodeId, - ]) + }, [connectedNodes, connectedEdges]) React.useEffect(() => { - if (selectedNodeId) { - zoomToSelectedNode(zoom) - } else { - zoomToCurrentNode(zoom) - } - }, [zoomToCurrentNode, zoomToSelectedNode]) + zoomToSelectedNode() + }, [zoomToSelectedNode]) React.useEffect(() => { updateZoom(viewportZoom) @@ -363,7 +346,7 @@ export function LineageLayoutBase< position="top-right" className="m-1 border-2 border-lineage-control-border rounded-sm overflow-hidden" > - {currentNode && ( + {currentNode && showControlZoomToCurrentNode && ( zoomToCurrentNode(DEFAULT_ZOOM)} @@ -373,24 +356,28 @@ export function LineageLayoutBase< )} {selectedNodeId && ( <> - setShowOnlySelectedNodes(!showOnlySelectedNodes)} - > - - - zoomToSelectedNode(DEFAULT_ZOOM)} - > - - + {showControlOnlySelectedNodes && ( + setShowOnlySelectedNodes(!showOnlySelectedNodes)} + > + + + )} + {showControlZoomToSelectedNode && ( + zoomToSelectedNode(DEFAULT_ZOOM)} + > + + + )} )} {controls && typeof controls === 'function' diff --git a/web/common/src/components/Lineage/LineageLayoutContainer.tsx b/web/common/src/components/Lineage/LineageLayoutContainer.tsx index 4bd0d42402..2ba0e00d56 100644 --- a/web/common/src/components/Lineage/LineageLayoutContainer.tsx +++ b/web/common/src/components/Lineage/LineageLayoutContainer.tsx @@ -1,4 +1,4 @@ -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import React from 'react' diff --git a/web/common/src/components/Lineage/layout/dagreLayout.ts b/web/common/src/components/Lineage/layout/dagreLayout.ts index 554d427f03..d7a5c01e2e 100644 --- a/web/common/src/components/Lineage/layout/dagreLayout.ts +++ b/web/common/src/components/Lineage/layout/dagreLayout.ts @@ -36,11 +36,7 @@ export function buildLayout< const nodeCount = nodes.length const edgeCount = edges.length - if (nodeCount === 0) - return { - edges: [], - nodesMap: {}, - } + if (nodeCount === 0) return {} const g = new dagre.graphlib.Graph({ compound: true, @@ -92,8 +88,5 @@ export function buildLayout< } } - return { - edges, - nodesMap, - } + return { ...nodesMap } } diff --git a/web/common/src/components/Lineage/node/NodeAppendix.tsx b/web/common/src/components/Lineage/node/NodeAppendix.tsx index 5a703a468f..48194c1442 100644 --- a/web/common/src/components/Lineage/node/NodeAppendix.tsx +++ b/web/common/src/components/Lineage/node/NodeAppendix.tsx @@ -1,7 +1,7 @@ import { cva, type VariantProps } from 'class-variance-authority' import { forwardRef, type HTMLAttributes } from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' const appendixVariants = cva( 'node-appendix absolute flex w-full flex-col items-center', diff --git a/web/common/src/components/Lineage/node/NodeBadge.tsx b/web/common/src/components/Lineage/node/NodeBadge.tsx index 8c894ecca2..b05283dfa8 100644 --- a/web/common/src/components/Lineage/node/NodeBadge.tsx +++ b/web/common/src/components/Lineage/node/NodeBadge.tsx @@ -1,7 +1,7 @@ import React from 'react' -import { cn } from '@/utils' -import { Badge, type BadgeProps } from '@/components/Badge/Badge' +import { cn } from '@sqlmesh-common/utils' +import { Badge, type BadgeProps } from '@sqlmesh-common/components/Badge/Badge' export const NodeBadge = React.forwardRef( ({ className, children, ...props }, ref) => { diff --git a/web/common/src/components/Lineage/node/NodeBase.tsx b/web/common/src/components/Lineage/node/NodeBase.tsx index 78033a4099..89342d83c8 100644 --- a/web/common/src/components/Lineage/node/NodeBase.tsx +++ b/web/common/src/components/Lineage/node/NodeBase.tsx @@ -1,8 +1,8 @@ import { type NodeProps } from '@xyflow/react' import React from 'react' -import { BaseNode } from '@/components/Lineage/node/base-node' -import { cn } from '@/utils' +import { BaseNode } from '@sqlmesh-common/components/Lineage/node/base-node' +import { cn } from '@sqlmesh-common/utils' export interface NodeBaseProps extends NodeProps { className?: string diff --git a/web/common/src/components/Lineage/node/NodeContainer.tsx b/web/common/src/components/Lineage/node/NodeContainer.tsx index 0506771eae..c72d60e4ed 100644 --- a/web/common/src/components/Lineage/node/NodeContainer.tsx +++ b/web/common/src/components/Lineage/node/NodeContainer.tsx @@ -1,7 +1,7 @@ import React from 'react' -import { cn } from '@/utils' -import { VerticalContainer } from '@/components/VerticalContainer/VerticalContainer' +import { cn } from '@sqlmesh-common/utils' +import { VerticalContainer } from '@sqlmesh-common/components/VerticalContainer/VerticalContainer' export const NodeContainer = React.forwardRef< HTMLDivElement, diff --git a/web/common/src/components/Lineage/node/NodeDetail.tsx b/web/common/src/components/Lineage/node/NodeDetail.tsx index 61e22d169c..f57978d865 100644 --- a/web/common/src/components/Lineage/node/NodeDetail.tsx +++ b/web/common/src/components/Lineage/node/NodeDetail.tsx @@ -1,4 +1,4 @@ -import { Metadata, cn } from '@tobikodata/sqlmesh-common' +import { Metadata, cn } from '@sqlmesh-common/index' import { NodeDivider } from './NodeDivider' diff --git a/web/common/src/components/Lineage/node/NodeHandle.tsx b/web/common/src/components/Lineage/node/NodeHandle.tsx index d50d90422a..6e7aa4dd22 100644 --- a/web/common/src/components/Lineage/node/NodeHandle.tsx +++ b/web/common/src/components/Lineage/node/NodeHandle.tsx @@ -1,7 +1,7 @@ import { Position } from '@xyflow/react' import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { BaseHandle } from './base-handle' import type { HandleId } from '../utils' diff --git a/web/common/src/components/Lineage/node/NodeHandleIcon.tsx b/web/common/src/components/Lineage/node/NodeHandleIcon.tsx index b55d96a041..caafa617a9 100644 --- a/web/common/src/components/Lineage/node/NodeHandleIcon.tsx +++ b/web/common/src/components/Lineage/node/NodeHandleIcon.tsx @@ -1,6 +1,6 @@ import { ArrowRight } from 'lucide-react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export function NodeHandleIcon({ className, diff --git a/web/common/src/components/Lineage/node/NodeHandles.tsx b/web/common/src/components/Lineage/node/NodeHandles.tsx index 453ff74317..3d7d6e08ab 100644 --- a/web/common/src/components/Lineage/node/NodeHandles.tsx +++ b/web/common/src/components/Lineage/node/NodeHandles.tsx @@ -1,7 +1,7 @@ import React from 'react' -import { cn } from '@/utils' -import { HorizontalContainer } from '@/components/HorizontalContainer/HorizontalContainer' +import { cn } from '@sqlmesh-common/utils' +import { HorizontalContainer } from '@sqlmesh-common/components/HorizontalContainer/HorizontalContainer' import { NodeHandle } from './NodeHandle' import type { HandleId } from '../utils' diff --git a/web/common/src/components/Lineage/node/NodeHeader.tsx b/web/common/src/components/Lineage/node/NodeHeader.tsx index 154dc166de..41e83aaa4e 100644 --- a/web/common/src/components/Lineage/node/NodeHeader.tsx +++ b/web/common/src/components/Lineage/node/NodeHeader.tsx @@ -1,6 +1,6 @@ import { type HTMLAttributes, forwardRef } from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' /* NODE HEADER -------------------------------------------------------------- */ diff --git a/web/common/src/components/Lineage/node/NodePort.tsx b/web/common/src/components/Lineage/node/NodePort.tsx index 7380716f02..207be24576 100644 --- a/web/common/src/components/Lineage/node/NodePort.tsx +++ b/web/common/src/components/Lineage/node/NodePort.tsx @@ -1,7 +1,7 @@ -import { useNodeConnections, useUpdateNodeInternals } from '@xyflow/react' +import { useNodeConnections } from '@xyflow/react' import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { type NodeId, type PortHandleId } from '../utils' import { NodeHandles } from './NodeHandles' @@ -21,8 +21,6 @@ export function NodePort< className?: string children: React.ReactNode }) { - const updateNodeInternals = useUpdateNodeInternals() - const sources = useNodeConnections({ id: nodeId, handleType: 'source', @@ -45,12 +43,6 @@ export function NodePort< const leftId = isLeftHandleId(id) ? id : undefined const rightId = isRightHandleId(id) ? id : undefined - React.useEffect(() => { - if (leftId || rightId) { - updateNodeInternals(nodeId) - } - }, [updateNodeInternals, nodeId, leftId, rightId]) - return ( data-component="NodePort" diff --git a/web/common/src/components/Lineage/node/NodePorts.tsx b/web/common/src/components/Lineage/node/NodePorts.tsx index f417dea9e4..1f40dc764f 100644 --- a/web/common/src/components/Lineage/node/NodePorts.tsx +++ b/web/common/src/components/Lineage/node/NodePorts.tsx @@ -1,6 +1,6 @@ -import { cn } from '@/utils' -import { VirtualList } from '@/components/VirtualList/VirtualList' -import { FilterableList } from '@/components/VirtualList/FilterableList' +import { cn } from '@sqlmesh-common/utils' +import { VirtualList } from '@sqlmesh-common/components/VirtualList/VirtualList' +import { FilterableList } from '@sqlmesh-common/components/VirtualList/FilterableList' import type { IFuseOptions } from 'fuse.js' export function NodePorts({ diff --git a/web/common/src/components/Lineage/node/base-handle.tsx b/web/common/src/components/Lineage/node/base-handle.tsx index e6b8f0c24b..0ce6a98745 100644 --- a/web/common/src/components/Lineage/node/base-handle.tsx +++ b/web/common/src/components/Lineage/node/base-handle.tsx @@ -2,7 +2,7 @@ import { Handle, type HandleProps } from '@xyflow/react' import { forwardRef } from 'react' import type { ForwardRefExoticComponent, RefAttributes } from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export const BaseHandle: ForwardRefExoticComponent< HandleProps & RefAttributes diff --git a/web/common/src/components/Lineage/node/base-node.tsx b/web/common/src/components/Lineage/node/base-node.tsx index d349ca601a..f1b5c7d509 100644 --- a/web/common/src/components/Lineage/node/base-node.tsx +++ b/web/common/src/components/Lineage/node/base-node.tsx @@ -1,6 +1,6 @@ import { type HTMLAttributes, forwardRef } from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export const BaseNode = forwardRef< HTMLDivElement, diff --git a/web/common/src/components/Lineage/stories/Lineage.stories.tsx b/web/common/src/components/Lineage/stories/Lineage.stories.tsx index 115be3c2c0..76c4229250 100644 --- a/web/common/src/components/Lineage/stories/Lineage.stories.tsx +++ b/web/common/src/components/Lineage/stories/Lineage.stories.tsx @@ -129,7 +129,7 @@ export const LineageModel = () => { }} >