diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index 7163b669001..61e0f77a3f4 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -33,6 +33,8 @@ from dbt.events.format import pluralize import dbt.tracking +from .language_provider import IbisProvider + graph_file_name = "graph.gpickle" @@ -369,7 +371,23 @@ def _compile_node( ) compiled_node = _compiled_type_for(node).from_dict(data) - if compiled_node.language == ModelLanguage.python: + compiled_node.was_ibis = False + if compiled_node.language == ModelLanguage.ibis: + + provider = IbisProvider() + context = self._create_node_context(compiled_node, manifest, extra_context) + sql = provider.compile(node.raw_code, context) + + # lol + compiled_node.compiled_code = sql + compiled_node.language = ModelLanguage.sql + + # efficiency hack? + # doesn't seem to help much, if at all + compiled_node.was_ibis = True + + elif compiled_node.language == ModelLanguage.python: + # TODO could we also 'minify' this code at all? just aesthetic, not functional # quoating seems like something very specific to sql so far @@ -390,12 +408,13 @@ def _compile_node( self.config.quoting = original_quoting else: - context = self._create_node_context(compiled_node, manifest, extra_context) - compiled_node.compiled_code = jinja.get_rendered( - node.raw_code, - context, - node, - ) + if not compiled_node.was_ibis: + context = self._create_node_context(compiled_node, manifest, extra_context) + compiled_node.compiled_code = jinja.get_rendered( + node.raw_code, + context, + node, + ) compiled_node.relation_name = self._get_relation_name(node) diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index 93f12a1411e..25fed0d56a6 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -22,6 +22,7 @@ class ParseFileType(StrEnum): Documentation = "docs" Schema = "schema" Hook = "hook" # not a real filetype, from dbt_project.yml + language: str = "sql" parse_file_type_to_parser = { @@ -194,6 +195,7 @@ class SourceFile(BaseSourceFile): docs: List[str] = field(default_factory=list) macros: List[str] = field(default_factory=list) env_vars: List[str] = field(default_factory=list) + language: str = "sql" @classmethod def big_seed(cls, path: FilePath) -> "SourceFile": diff --git a/core/dbt/dbt_ibis/_dbt_ibis.py b/core/dbt/dbt_ibis/_dbt_ibis.py new file mode 100644 index 00000000000..6e00eeb3cca --- /dev/null +++ b/core/dbt/dbt_ibis/_dbt_ibis.py @@ -0,0 +1,28 @@ +""" +adapted from https://github.com/dbt-labs/dbt-core/pull/5982/files +""" +import ibis + + +def compile(code: str, context): + + conn_params = { + "account": context["target"]["account"], + "user": context["target"]["user"], + "role": context["target"]["role"], + "warehouse": context["target"]["warehouse"], + "database": context["target"]["database"], + "schema": context["target"]["schema"], + "authenticator": "externalbrowser", + } + + s = ibis.connect( + f"snowflake://{conn_params['user']}:_@{conn_params['account']}/{conn_params['database']}/{conn_params['schema']}?warehouse={conn_params['warehouse']}&role={conn_params['role']}&authenticator={conn_params['authenticator']}", + ) + + # the dirtiest code I've ever written? + # run the ibis code and compile the `model` variable + exec(code) + compiled = str(eval(f"ibis.{context['target']['type']}.compile(model)")) + + return compiled diff --git a/core/dbt/graph/selector_spec.py b/core/dbt/graph/selector_spec.py index 991ae7fcb89..425f3807aac 100644 --- a/core/dbt/graph/selector_spec.py +++ b/core/dbt/graph/selector_spec.py @@ -80,7 +80,7 @@ def __post_init__(self): def default_method(cls, value: str) -> MethodName: if _probably_path(value): return MethodName.Path - elif value.lower().endswith((".sql", ".py", ".csv")): + elif value.lower().endswith((".sql", ".py", ".csv", ".ibis")): return MethodName.File else: return MethodName.FQN diff --git a/core/dbt/language_provider.py b/core/dbt/language_provider.py new file mode 100644 index 00000000000..ccae2a6869b --- /dev/null +++ b/core/dbt/language_provider.py @@ -0,0 +1,23 @@ +class LanguageProvider: + + # def compile(self, code: str) -> ParsedNode: + def compile(self, code: str) -> str: + """ + Compile a given block into a ParsedNode. + """ + raise NotImplementedError("compile") + + +class IbisProvider(LanguageProvider): + def __init__(self) -> None: + # TODO: Uncomment when dbt-ibis is released + # if not dbt_ibis: + # raise ImportError( + # "dbt_ibis is required and not found; try running `pip install dbt-ibis`" + # ) + pass + + def compile(self, code: str, context) -> str: + from .dbt_ibis import _dbt_ibis as dbt_ibis + + return dbt_ibis.compile(code, context) diff --git a/core/dbt/node_types.py b/core/dbt/node_types.py index a6fa5ff4f84..46dd194014a 100644 --- a/core/dbt/node_types.py +++ b/core/dbt/node_types.py @@ -68,3 +68,4 @@ class RunHookType(StrEnum): class ModelLanguage(StrEnum): python = "python" sql = "sql" + ibis = "ibis" diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index b6d349803f6..1e4a646701a 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -157,7 +157,7 @@ def _mangle_hooks(self, config): config[key] = [hooks.get_hook_dict(h) for h in config[key]] def _create_error_node( - self, name: str, path: str, original_file_path: str, raw_code: str, language: str = "sql" + self, name: str, path: str, original_file_path: str, raw_code: str, language: str ) -> UnparsedNode: """If we hit an error before we've actually parsed a node, provide some level of useful information by attaching this to the exception. @@ -189,7 +189,9 @@ def _create_parsetime_node( """ if name is None: name = block.name - if block.path.relative_path.endswith(".py"): + if block.path.relative_path.endswith(".ibis"): + language = ModelLanguage.ibis + elif block.path.relative_path.endswith(".py"): language = ModelLanguage.python else: # this is not ideal but we have a lot of tests to adjust if don't do it @@ -223,6 +225,7 @@ def _create_parsetime_node( path=path, original_file_path=block.path.original_file_path, raw_code=block.contents, + language=language, ) raise ParsingException(msg, node=node) diff --git a/core/dbt/parser/models.py b/core/dbt/parser/models.py index aaf6a0d016e..91c50af0c7c 100644 --- a/core/dbt/parser/models.py +++ b/core/dbt/parser/models.py @@ -31,7 +31,6 @@ from dbt.dataclass_schema import ValidationError from dbt.exceptions import ParsingException, validator_error_message, UndefinedMacroException - dbt_function_key_words = set(["ref", "source", "config", "get"]) dbt_function_full_names = set(["dbt.ref", "dbt.source", "dbt.config", "dbt.config.get"]) diff --git a/core/dbt/parser/read_files.py b/core/dbt/parser/read_files.py index ccb6b1b0790..0f9c2ce3702 100644 --- a/core/dbt/parser/read_files.py +++ b/core/dbt/parser/read_files.py @@ -175,7 +175,7 @@ def read_files(project, files, parser_files, saved_files): project, files, project.model_paths, - [".sql", ".py"], + [".sql", ".py", ".ibis"], ParseFileType.Model, saved_files, dbt_ignore_spec, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 21521c85e53..fd8d87fc546 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -270,6 +270,7 @@ def get_hashable_md(data: Union[str, int, float, List, Dict]) -> Union[str, List path=path, original_file_path=target.original_file_path, raw_code=raw_code, + language="sql", ) raise ParsingException(msg, node=node) from exc