-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Model Deprecation #7562
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Model Deprecation #7562
Changes from 10 commits
7cc1d74
55debbe
9ca7d37
fd88586
7842dca
37c8361
ccd73b0
8277e8c
0637591
d9a4cc2
ed443da
e64efb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| kind: Features | ||
| body: Added warnings for model and ref deprecations | ||
| time: 2023-05-09T23:33:29.679333-04:00 | ||
| custom: | ||
| Author: peterallenwebb | ||
| Issue: "7433" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,7 @@ class PublicModel(dbtClassMixin, ManifestOrPublicNode): | |
| # list of model unique_ids | ||
| public_node_dependencies: List[str] = field(default_factory=list) | ||
| generated_at: datetime = field(default_factory=datetime.utcnow) | ||
| deprecation_date: Optional[datetime] = None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✨ |
||
|
|
||
| @property | ||
| def is_latest_version(self) -> bool: | ||
|
|
||
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,7 @@ | ||
| from copy import deepcopy | ||
| from dataclasses import dataclass | ||
| from dataclasses import field | ||
| from datetime import datetime | ||
| import datetime | ||
| import os | ||
| import traceback | ||
| from typing import ( | ||
|
|
@@ -22,6 +22,7 @@ | |
| from dbt.events.base_types import EventLevel | ||
| import json | ||
| import pprint | ||
| import msgpack | ||
|
|
||
| import dbt.exceptions | ||
| import dbt.tracking | ||
|
|
@@ -51,6 +52,9 @@ | |
| StateCheckVarsHash, | ||
| Note, | ||
| PublicationArtifactChanged, | ||
| DeprecatedModel, | ||
| DeprecatedReference, | ||
| UpcomingReferenceDeprecation, | ||
| ) | ||
| from dbt.logger import DbtProcessState | ||
| from dbt.node_types import NodeType, AccessType | ||
|
|
@@ -131,6 +135,36 @@ | |
| PERF_INFO_FILE_NAME = "perf_info.json" | ||
|
|
||
|
|
||
| def extended_mashumaro_encoder(data): | ||
| return msgpack.packb(data, default=extended_msgpack_encoder, use_bin_type=True) | ||
|
|
||
|
|
||
| def extended_msgpack_encoder(obj): | ||
| if type(obj) is datetime.date: | ||
peterallenwebb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| date_bytes = msgpack.ExtType(1, obj.isoformat().encode()) | ||
| return date_bytes | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The msgpack docs outline an alternative method to encoding custom types by returning something more like: {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}It looks to be interchangeable with |
||
| elif type(obj) is datetime.datetime: | ||
| datetime_bytes = msgpack.ExtType(2, obj.isoformat().encode()) | ||
| return datetime_bytes | ||
|
|
||
| return obj | ||
|
|
||
|
|
||
| def extended_mashumuro_decoder(data): | ||
| return msgpack.unpackb(data, ext_hook=extended_msgpack_decoder, raw=False) | ||
|
|
||
|
|
||
| def extended_msgpack_decoder(code, data): | ||
| if code == 1: | ||
| d = datetime.date.fromisoformat(data.decode()) | ||
| return d | ||
| elif code == 2: | ||
| dt = datetime.datetime.fromisoformat(data.decode()) | ||
| return dt | ||
| else: | ||
| return msgpack.ExtType(code, data) | ||
|
|
||
|
|
||
| class ReparseReason(StrEnum): | ||
| version_mismatch = "01_version_mismatch" | ||
| file_not_found = "02_file_not_found" | ||
|
|
@@ -511,8 +545,46 @@ def load(self): | |
| # write out the fully parsed manifest | ||
| self.write_manifest_for_partial_parse() | ||
|
|
||
| self.check_for_model_deprecations() | ||
|
|
||
| return self.manifest | ||
|
|
||
| def check_for_model_deprecations(self): | ||
| for node in self.manifest.nodes.values(): | ||
| if isinstance(node, ModelNode): | ||
| if ( | ||
| node.deprecation_date | ||
| and node.deprecation_date < datetime.datetime.now().astimezone() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @peterallenwebb want to double-check two things with you:
naive
|
||
| ): | ||
| fire_event( | ||
| DeprecatedModel( | ||
| model_name=node.name, | ||
| model_version=str(node.version), | ||
MichelleArk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| deprecation_date=node.deprecation_date.isoformat(), | ||
| ) | ||
| ) | ||
|
|
||
| resolved_refs = self.manifest.resolve_refs(node, self.root_project.project_name) | ||
| resolved_model_refs = [r for r in resolved_refs if isinstance(r, ModelNode)] | ||
| for resolved_ref in resolved_model_refs: | ||
| if resolved_ref.deprecation_date: | ||
|
|
||
| if resolved_ref.deprecation_date < datetime.datetime.now().astimezone(): | ||
| event_cls = DeprecatedReference | ||
| else: | ||
| event_cls = UpcomingReferenceDeprecation | ||
|
|
||
| fire_event( | ||
| event_cls( | ||
| model_name=node.name, | ||
| ref_model_package=resolved_ref.package_name, | ||
| ref_model_name=resolved_ref.name, | ||
| ref_model_version=str(resolved_ref.version), | ||
| ref_model_latest_version=str(resolved_ref.latest_version), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto here and on the line above re: string conversion and error messages: https://github.com/dbt-labs/dbt-core/pull/7562/files#r1199287517 |
||
| ref_model_deprecation_date=resolved_ref.deprecation_date.isoformat(), | ||
| ) | ||
| ) | ||
|
|
||
| def load_and_parse_macros(self, project_parser_files): | ||
| for project in self.all_projects.values(): | ||
| if project.project_name not in project_parser_files: | ||
|
|
@@ -658,7 +730,7 @@ def write_manifest_for_partial_parse(self): | |
| UnableToPartialParse(reason="saved manifest contained the wrong version") | ||
| ) | ||
| self.manifest.metadata.dbt_version = __version__ | ||
| manifest_msgpack = self.manifest.to_msgpack() | ||
| manifest_msgpack = self.manifest.to_msgpack(extended_mashumaro_encoder) | ||
| make_directory(os.path.dirname(path)) | ||
| with open(path, "wb") as fp: | ||
| fp.write(manifest_msgpack) | ||
|
|
@@ -872,14 +944,14 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]: | |
| try: | ||
| with open(path, "rb") as fp: | ||
| manifest_mp = fp.read() | ||
| manifest: Manifest = Manifest.from_msgpack(manifest_mp) # type: ignore | ||
| manifest: Manifest = Manifest.from_msgpack(manifest_mp, decoder=extended_mashumuro_decoder) # type: ignore | ||
| # keep this check inside the try/except in case something about | ||
| # the file has changed in weird ways, perhaps due to being a | ||
| # different version of dbt | ||
| is_partial_parsable, reparse_reason = self.is_partial_parsable(manifest) | ||
| if is_partial_parsable: | ||
| # We don't want to have stale generated_at dates | ||
| manifest.metadata.generated_at = datetime.utcnow() | ||
| manifest.metadata.generated_at = datetime.datetime.utcnow() | ||
peterallenwebb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # or invocation_ids | ||
| manifest.metadata.invocation_id = get_invocation_id() | ||
| return manifest | ||
|
|
@@ -1718,6 +1790,7 @@ def write_publication_artifact(root_project: RuntimeConfig, manifest: Manifest): | |
| latest_version=model.latest_version, | ||
| public_node_dependencies=list(public_node_dependencies), | ||
| generated_at=metadata.generated_at, | ||
| deprecation_date=model.deprecation_date, | ||
| ) | ||
| public_models[unique_id] = public_model | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Python docs here say the following:
Translating those into code gives something like this.
The slight change suggested below would cover the case where
tzinfois notNonebutdt.tzinfo.utcoffset(dt)returnsNone.To fully encode the Python datetime definition, I think we'd need to do something like this:
Granted, it might "never" happen for a
tzinfoobject frompytzto returnNonefor the offset, but I was able to hand construct such atzinfoobject locally.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd be happy to tighten up this check if you're willing to open the issue. Sorry we didn't get it in v1.