Add dry_run method to base adapter with implementation for SQLAdapters

resolves #7839 In the CLI integration, MetricFlow will issue dry run queries as part of its warehouse-level validation of the semantic manifest, including all semantic model and metric definitions. In most cases, issuing an `explain` query is adequate, however, BigQuery does not support the `explain` keyword and so we cannot simply pre-pend `explain` to our input queries and expect the correct behavior across all contexts. This commit adds a dry_run() method to the BaseAdapter which mirrors the execute() method in that it simply delegates to the ConnectionManager. It also adds a working implementation to the SQLConnectionManager and includes a few test cases for adapter maintainers to try out on their own. The current implementation should work out of the box with most of our adapters. BigQuery will require us to implement the dry_run method on the BigQueryConnectionManager, and community-maintained adapters can opt in by enabling the test and ensuring their own implementations work as expected. Note - we decided to make these concrete methods that throw runtime exceptions for direct descendants of BaseAdapter in order to avoid forcing community adapter maintainers to implement a method that does not currently have any use cases in dbt proper.
dbt-labs · gshank · Jul 11, 2023 · Jun 30, 2023 · Jul 1, 2023 · Jul 5, 2023
commit 3a99352d06e7a5ae17175049dfbbac31f24a7ed6
@@ -0,0 +1,6 @@
+kind: Features
+body: Add dry_run method to BaseAdapter with implementation for SQLAdapter
+time: 2023-06-29T17:57:12.599313-07:00
+custom:
+  Author: tlento
+  Issue: "7839"
diff --git a/core/dbt/adapters/base/connections.py b/core/dbt/adapters/base/connections.py
@@ -65,6 +65,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
         - commit
         - clear_transaction
         - execute
+        - dry_run
 
     You must also set the 'TYPE' class attribute with a class-unique constant
     string.
@@ -412,3 +413,14 @@ def execute(
         :rtype: Tuple[AdapterResponse, agate.Table]
         """
         raise dbt.exceptions.NotImplementedError("`execute` is not implemented for this adapter!")
+
+    def dry_run(self, sql: str) -> AdapterResponse:
+        """Submit the given SQL to the engine for validation, but not execution.
+
+        This should throw an appropriate exception if the input SQL is invalid, although
+        in practice that will generally be handled by delegating to execute() and
+        allowing the error handler to take care of the rest.
+
+        :param sql str: The sql to validate
+        """
+        raise dbt.exceptions.NotImplementedError("`dry_run` is not implemented for this adapter!")
@@ -289,6 +289,15 @@ def execute(
         """
         return self.connections.execute(sql=sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
 
+    def dry_run(self, sql: str) -> AdapterResponse:
+        """Submit the given SQL for validation, but not execution.
+
+        This is a thin wrapper around ConnectionManager.dry_run.
+
+        :param str sql: The sql to validate
+        """
+        return self.connections.dry_run(sql=sql)
+
     @available.parse(lambda *a, **k: [])
     def get_column_schema_from_query(self, sql: str) -> List[BaseColumn]:
         """Get a list of the Columns with names and data types from the given sql."""
@@ -785,7 +794,6 @@ def _make_match(
         schema: str,
         identifier: str,
     ) -> List[BaseRelation]:
-
         matches = []
 
         search = self._make_match_kwargs(database, schema, identifier)
@@ -1063,7 +1071,6 @@ def _get_one_catalog(
         schemas: Set[str],
         manifest: Manifest,
     ) -> agate.Table:
-
         kwargs = {"information_schema": information_schema, "schemas": schemas}
         table = self.execute_macro(
             GET_CATALOG_MACRO_NAME,
@@ -1453,7 +1460,6 @@ def render_model_constraint(cls, constraint: ModelLevelConstraint) -> Optional[s
 def catch_as_completed(
     futures,  # typing: List[Future[agate.Table]]
 ) -> Tuple[agate.Table, List[Exception]]:
-
     # catalogs: agate.Table = agate.Table(rows=[])
     tables: List[agate.Table] = []
     exceptions: List[Exception] = []

@@ -52,7 +52,6 @@ def add_query(
         bindings: Optional[Any] = None,
         abridge_sql_log: bool = False,
     ) -> Tuple[Connection, Any]:
-
         connection = self.get_thread_connection()
         if auto_begin and connection.transaction_open is False:
             self.begin()
@@ -152,6 +151,18 @@ def execute(
             table = dbt.clients.agate_helper.empty_table()
         return response, table
 
+    def dry_run(self, sql: str) -> AdapterResponse:
+        """Submit the given SQL to the engine for validation, but not execution.
+
+        By default we simply prefix the query with the explain keyword and allow the
+        exceptions thrown by the underlying engine on invalid SQL inputs to bubble up
+        to the exception handler.
+
+        :param sql str: The sql to validate
+        """
+        explain_sql = f"explain {sql}"
+        return self.execute(explain_sql, auto_begin=True)[0]
+
     def add_begin_query(self):
         return self.add_query("BEGIN", auto_begin=False)
 

diff --git a/tests/adapter/dbt/tests/adapter/utils/test_dry_run.py b/tests/adapter/dbt/tests/adapter/utils/test_dry_run.py
@@ -0,0 +1,66 @@
+from typing import Type
+
+import pytest
+
+from dbt.exceptions import DbtRuntimeError
+from dbt.adapters.base.impl import BaseAdapter
+
+
+class BaseDryRunMethod:
+    """Tests the behavior of the dry run method for the relevant adapters.
+
+    The valid and invalid SQL should work with most engines by default, but
+    both inputs can be overridden as needed for a given engine to get the correct
+    behavior.
+
+    The base method is meant to throw the appropriate custom exception when dry_run
+    fails.
+    """
+
+    @pytest.fixture(scope="class")
+    def valid_sql(self) -> str:
+        """Returns a valid statement for issuing as a dry run query.
+
+        Ideally this would be checkable for non-execution. For example, we could use a
+        CREATE TABLE statement with an assertion that no table was created. However,
+        for most adapter types this is unnecessary - the EXPLAIN keyword has exactly the
+        behavior we want, and here we are essentially testing to make sure it is
+        supported. As such, we return a simple SELECT query, and leave it to
+        engine-specific test overrides to specify more detailed behavior as appropriate.
+        """
+
+        return "select 1"
+
+    @pytest.fixture(scope="class")
+    def invalid_sql(self) -> str:
+        """Returns an invalid statement for issuing a bad dry run query."""
+
+        return "Let's run some invalid SQL and see if we get an error!"
+
+    @pytest.fixture(scope="class")
+    def expected_exception(self) -> Type[Exception]:
+        """Returns the Exception type thrown by a failed query.
+
+        Defaults to dbt.exceptions.DbtRuntimeError because that is the most common
+        base exception for adapters to throw."""
+        return DbtRuntimeError
+
+    def test_valid_dry_run(self, adapter: BaseAdapter, valid_sql: str) -> None:
+        """Executes a dry run query on valid SQL. No news is good news."""
+        with adapter.connection_named("test_valid_dry_run"):
+            adapter.dry_run(valid_sql)
+
+    def test_invalid_dry_run(
+        self,
+        adapter: BaseAdapter,
+        invalid_sql: str,
+        expected_exception: Type[Exception],
+    ) -> None:
+        """Executes a dry run query on invalid SQL, expecting the exception."""
+        with pytest.raises(expected_exception):
+            with adapter.connection_named("test_invalid_dry_run"):
+                adapter.dry_run(invalid_sql)
+
+
+class TestDryRunMethod(BaseDryRunMethod):
+    pass