-
Notifications
You must be signed in to change notification settings - Fork 380
execution context fixes #20 #34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
1f76d48
3059f79
2d4c701
24dbe57
23a929b
7fbf430
37c0b77
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,7 +50,7 @@ | |
| from sqlmesh.core.context_diff import ContextDiff | ||
| from sqlmesh.core.dag import DAG | ||
| from sqlmesh.core.dialect import extend_sqlglot, format_model_expressions | ||
| from sqlmesh.core.engine_adapter import EngineAdapter | ||
| from sqlmesh.core.engine_adapter import DF, EngineAdapter | ||
| from sqlmesh.core.environment import Environment | ||
| from sqlmesh.core.macros import macro | ||
| from sqlmesh.core.model import Model | ||
|
|
@@ -69,10 +69,51 @@ | |
| if t.TYPE_CHECKING: | ||
| import graphviz | ||
|
|
||
| MODEL_OR_SNAPSHOT = t.Union[str, Model, Snapshot] | ||
|
|
||
| extend_sqlglot() | ||
|
|
||
|
|
||
| class Context: | ||
| class ExecutionContext: | ||
| """The minimal context needed in order to execute a query. | ||
| Args: | ||
| engine_adapter: The engine adapter to execute queries against. | ||
| mapping: A mapping of models to physical tables. | ||
| """ | ||
|
|
||
| def __init__(self, engine_adapter: EngineAdapter, mapping: t.Dict[str, str]): | ||
| self.engine_adapter = engine_adapter | ||
| self.spark = self.engine_adapter.spark | ||
|
vchan marked this conversation as resolved.
Outdated
|
||
| self._mapping = mapping | ||
|
|
||
| @property | ||
| def mapping(self) -> t.Dict[str, str]: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Nitpick] TBH I really dislike the name
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed to model_tables |
||
| return self._mapping | ||
|
|
||
| def table(self, model_name: str) -> str: | ||
| """Gets the physical table name for a given model. | ||
|
|
||
| Args: | ||
| model_name: The model name. | ||
|
|
||
| Returns: | ||
| The physical table name. | ||
| """ | ||
| return self.mapping[model_name] | ||
|
|
||
| def fetchdf(self, query: t.Union[exp.Expression, str]) -> DF: | ||
| """Fetches a dataframe given a sql string or sqlglot expression. | ||
|
|
||
| Args: | ||
| query: SQL string or sqlglot expression. | ||
|
|
||
| Returns: | ||
| The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. | ||
| """ | ||
| return self.engine_adapter.fetchdf(query) | ||
|
|
||
|
|
||
| class Context(ExecutionContext): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way we use the base class here is quite sketchy and can lead to unintended consequences. For example we never invoke the base class constructor and only rely on method overriding hoping it would just do the right thing. As the code evolves custom initialization can be added to the constructor of I'd rather have an |
||
| """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks. | ||
|
|
||
| Args: | ||
|
|
@@ -303,9 +344,22 @@ def snapshots(self) -> t.Dict[str, Snapshot]: | |
| snapshots[model.name] = snapshot | ||
| return snapshots | ||
|
|
||
| @property | ||
| def mapping(self) -> t.Dict[str, str]: | ||
| """Mapping of model name to physical table name. | ||
|
|
||
| If a snapshot has not been versioned yet, it's view name will be returned. | ||
|
tobymao marked this conversation as resolved.
Outdated
|
||
| """ | ||
| return { | ||
| name: snapshot.table_name | ||
| if snapshot.version | ||
| else snapshot.qualified_view_name.for_environment(c.PROD) | ||
| for name, snapshot in self.snapshots.items() | ||
| } | ||
|
|
||
| def render( | ||
| self, | ||
| model: t.Union[str, Model], | ||
| model_or_snapshot: MODEL_OR_SNAPSHOT, | ||
| *, | ||
| start: t.Optional[TimeLike] = None, | ||
| end: t.Optional[TimeLike] = None, | ||
|
|
@@ -317,7 +371,7 @@ def render( | |
| """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. | ||
|
|
||
| Args: | ||
| model: The model name or instance to render. | ||
| model_or_snapshot: The model, model name, or snapshot to render. | ||
| start: The start of the interval to render. | ||
| end: The end of the interval to render. | ||
| latest: The latest time used for non incremental datasets. | ||
|
|
@@ -330,7 +384,14 @@ def render( | |
| The rendered expression. | ||
| """ | ||
| latest = latest or yesterday_ds() | ||
| model = model if isinstance(model, Model) else self.models[model] | ||
|
|
||
| if isinstance(model_or_snapshot, str): | ||
| model = self.models[model_or_snapshot] | ||
| elif isinstance(model_or_snapshot, Snapshot): | ||
| model = model_or_snapshot.model | ||
| else: | ||
| model = model_or_snapshot | ||
|
|
||
| expand = self.dag.upstream(model.name) if expand is True else expand or [] | ||
|
|
||
| return model.render_query( | ||
|
|
@@ -345,33 +406,43 @@ def render( | |
|
|
||
| def evaluate( | ||
| self, | ||
| snapshot: Snapshot | str, | ||
| model_or_snapshot: MODEL_OR_SNAPSHOT, | ||
| start: TimeLike, | ||
| end: TimeLike, | ||
| latest: TimeLike, | ||
| limit: t.Optional[int] = None, | ||
| **kwargs, | ||
| ) -> None: | ||
| """Evaluate a snapshot (running its query against a DB/Engine). | ||
| ) -> DF: | ||
| """Evaluate a model or snapshot (running its query against a DB/Engine). | ||
|
|
||
| This method is used to test or iterate on models without side effects. | ||
|
|
||
| Args: | ||
| snapshot: The snapshot to evaluate. | ||
| model_or_snapshot: The model, model name, or snapshot to render. | ||
| start: The start of the interval to evaluate. | ||
| end: The end of the interval to evaluate. | ||
| latest: The latest time used for non incremental datasets. | ||
| limit: A limit applied to the model, this must be > 0. | ||
| """ | ||
| if isinstance(snapshot, str): | ||
| snapshot = self.snapshots[snapshot] | ||
| if isinstance(model_or_snapshot, str): | ||
| snapshot = self.snapshots[model_or_snapshot] | ||
| elif isinstance(model_or_snapshot, Model): | ||
| snapshot = self.snapshots[model_or_snapshot.name] | ||
| else: | ||
| snapshot = model_or_snapshot | ||
|
|
||
| if not limit or limit <= 0: | ||
| limit = 1000 | ||
|
|
||
| self.snapshot_evaluator.evaluate( | ||
| return self.snapshot_evaluator.evaluate( | ||
| snapshot, | ||
| start, | ||
| end, | ||
| latest, | ||
| snapshots=self.snapshots, | ||
| mapping=self.mapping, | ||
| limit=limit, | ||
| ) | ||
|
|
||
| self.state_sync.add_interval(snapshot.snapshot_id, start, end) | ||
|
|
||
| def format(self) -> None: | ||
| """Format all models in a given directory.""" | ||
| for model in self.models.values(): | ||
|
|
@@ -680,7 +751,11 @@ def _load_models(self): | |
| new = registry.keys() - registered | ||
| registered |= new | ||
| for name in new: | ||
| model = registry[name].model(module, path) | ||
| model = registry[name].model( | ||
| module=module, | ||
| path=path, | ||
| time_column_format=self.config.time_column_format, | ||
| ) | ||
| self.models[model.name] = model | ||
| self._add_model_to_dag(model) | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.