Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230509-233329.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Added warnings for model and ref deprecations
time: 2023-05-09T23:33:29.679333-04:00
custom:
Author: peterallenwebb
Issue: "7433"
7 changes: 4 additions & 3 deletions core/dbt/config/renderer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, Any, Tuple, Optional, Union, Callable
import re
import os
from datetime import date

from dbt.clients.jinja import get_rendered, catch_jinja
from dbt.constants import SECRET_ENV_PREFIX
Expand Down Expand Up @@ -33,10 +34,10 @@ def render_entry(self, value: Any, keypath: Keypath) -> Any:
return self.render_value(value, keypath)

def render_value(self, value: Any, keypath: Optional[Keypath] = None) -> Any:
# keypath is ignored.
# if it wasn't read as a string, ignore it
# keypath is ignored (and someone who knows should explain why here)
if not isinstance(value, str):
return value
return value if not isinstance(value, date) else value.isoformat()

try:
with catch_jinja():
return get_rendered(value, self.context, native=True)
Expand Down
17 changes: 17 additions & 0 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,23 @@ def analysis_lookup(self) -> AnalysisLookup:
self._analysis_lookup = AnalysisLookup(self)
return self._analysis_lookup

def resolve_refs(
self, source_node: GraphMemberNode, current_project: str
) -> List[MaybeNonSource]:
resolved_refs: List[MaybeNonSource] = []
for ref in source_node.refs:
resolved = self.resolve_ref(
source_node,
ref.name,
ref.package,
ref.version,
current_project,
source_node.package_name,
)
resolved_refs.append(resolved)

return resolved_refs

# Called by dbt.parser.manifest._process_refs_for_exposure, _process_refs_for_metric,
# and dbt.parser.manifest._process_refs_for_node
def resolve_ref(
Expand Down
3 changes: 3 additions & 0 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from datetime import datetime
import time
from dataclasses import dataclass, field
from enum import Enum
Expand Down Expand Up @@ -568,6 +569,7 @@ class ModelNode(CompiledNode):
constraints: List[ModelLevelConstraint] = field(default_factory=list)
version: Optional[NodeVersion] = None
latest_version: Optional[NodeVersion] = None
deprecation_date: Optional[datetime] = None
state_relation: Optional[StateRelation] = None

@property
Expand Down Expand Up @@ -1415,6 +1417,7 @@ class ParsedNodePatch(ParsedPatch):
version: Optional[NodeVersion]
latest_version: Optional[NodeVersion]
constraints: List[Dict[str, Any]]
deprecation_date: Optional[datetime]


@dataclass
Expand Down
22 changes: 22 additions & 0 deletions core/dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import re

from dbt import deprecations
Expand Down Expand Up @@ -154,6 +155,7 @@ class UnparsedVersion(dbtClassMixin):
columns: Sequence[Union[dbt.helper_types.IncludeExclude, UnparsedColumn]] = field(
default_factory=list
)
deprecation_date: Optional[datetime.datetime] = None

def __lt__(self, other):
try:
Expand Down Expand Up @@ -192,6 +194,8 @@ def __post_init__(self):
else:
self._unparsed_columns.append(column)

self.deprecation_date = normalize_date(self.deprecation_date)


@dataclass
class UnparsedAnalysisUpdate(HasConfig, HasColumnDocs, HasColumnProps, HasYamlMetadata):
Expand All @@ -210,6 +214,7 @@ class UnparsedModelUpdate(UnparsedNodeUpdate):
access: Optional[str] = None
latest_version: Optional[NodeVersion] = None
versions: Sequence[UnparsedVersion] = field(default_factory=list)
deprecation_date: Optional[datetime.datetime] = None

def __post_init__(self):
if self.latest_version:
Expand All @@ -229,6 +234,8 @@ def __post_init__(self):

self._version_map = {version.v: version for version in self.versions}

self.deprecation_date = normalize_date(self.deprecation_date)

def get_columns_for_version(self, version: NodeVersion) -> List[UnparsedColumn]:
if version not in self._version_map:
raise DbtInternalError(
Expand Down Expand Up @@ -652,3 +659,18 @@ def validate(cls, data):
super(UnparsedGroup, cls).validate(data)
if data["owner"].get("name") is None and data["owner"].get("email") is None:
raise ValidationError("Group owner must have at least one of 'name' or 'email'.")


def normalize_date(d: Optional[datetime.date]) -> Optional[datetime.datetime]:
"""Convert date to datetime (at midnight), and add local time zone if naive"""
if d is None:
return None

# convert date to datetime
dt = d if type(d) == datetime.datetime else datetime.datetime(d.year, d.month, d.day)

if not dt.tzinfo:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Python docs here say the following:

image

Translating those into code gives something like this.

The slight change suggested below would cover the case where tzinfo is not None but dt.tzinfo.utcoffset(dt) returns None.

To fully encode the Python datetime definition, I think we'd need to do something like this:

Suggested change
if not dt.tzinfo:
if not dt.tzinfo or not dt.tzinfo.utcoffset(dt):

Granted, it might "never" happen for a tzinfo object from pytz to return None for the offset, but I was able to hand construct such a tzinfo object locally.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be happy to tighten up this check if you're willing to open the issue. Sorry we didn't get it in v1.

# date is naive, re-interpret as system time zone
dt = dt.astimezone()

return dt
1 change: 1 addition & 0 deletions core/dbt/contracts/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class PublicModel(dbtClassMixin, ManifestOrPublicNode):
# list of model unique_ids
public_node_dependencies: List[str] = field(default_factory=list)
generated_at: datetime = field(default_factory=datetime.utcnow)
deprecation_date: Optional[datetime] = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


@property
def is_latest_version(self) -> bool:
Expand Down
43 changes: 43 additions & 0 deletions core/dbt/events/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,49 @@ message UnpinnedRefNewVersionAvailableMsg {
UnpinnedRefNewVersionAvailable data = 2;
}

// I065
message DeprecatedModel {
string model_name = 1;
string model_version = 2;
string deprecation_date = 3;
}

message DeprecatedModelMsg {
EventInfo info = 1;
DeprecatedModel data = 2;
}

// I066
message UpcomingReferenceDeprecation {
string model_name = 1;
string ref_model_package = 2;
string ref_model_name = 3;
string ref_model_version = 4;
string ref_model_latest_version = 5;
string ref_model_deprecation_date = 6;
}

message UpcomingReferenceDeprecationMsg {
EventInfo info = 1;
UpcomingReferenceDeprecation data = 2;
}

// I067
message DeprecatedReference {
string model_name = 1;
string ref_model_package = 2;
string ref_model_name = 3;
string ref_model_version = 4;
string ref_model_latest_version = 5;
string ref_model_deprecation_date = 6;
}

message DeprecatedReferenceMsg {
EventInfo info = 1;
DeprecatedReference data = 2;
}


// M - Deps generation

// M001
Expand Down
56 changes: 56 additions & 0 deletions core/dbt/events/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,6 +1146,62 @@ def message(self) -> str:
return msg


class DeprecatedModel(WarnLevel):
def code(self):
return "I065"

def message(self) -> str:
version = ".v" + self.model_version if self.model_version else ""
return (
f"Model {self.model_name}{version} has passed its deprecation date of {self.deprecation_date}."
"This model should be disabled or removed."
)


class UpcomingReferenceDeprecation(WarnLevel):
def code(self):
return "I066"

def message(self) -> str:
ref_model_version = ".v" + self.ref_model_version if self.ref_model_version else ""
msg = (
f"While compiling '{self.model_name}': Found a reference to {self.ref_model_name}{ref_model_version}, "
f"which is slated for deprecation on '{self.ref_model_deprecation_date}'. "
)

if self.ref_model_version and self.ref_model_version != self.ref_model_latest_version:
coda = (
f"A new version of '{self.ref_model_name}' is available. Try it out: "
f"{{{{ ref('{self.ref_model_package}', '{self.ref_model_name}', "
f"v='{self.ref_model_latest_version}') }}}}."
)
msg = msg + coda

return msg


class DeprecatedReference(WarnLevel):
def code(self):
return "I067"

def message(self) -> str:
ref_model_version = ".v" + self.ref_model_version if self.ref_model_version else ""
msg = (
f"While compiling '{self.model_name}': Found a reference to {self.ref_model_name}{ref_model_version}, "
f"which was deprecated on '{self.ref_model_deprecation_date}'. "
)

if self.ref_model_version and self.ref_model_version != self.ref_model_latest_version:
coda = (
f"A new version of '{self.ref_model_name}' is available. Migrate now: "
f"{{{{ ref('{self.ref_model_package}', '{self.ref_model_name}', "
f"v='{self.ref_model_latest_version}') }}}}."
)
msg = msg + coda

return msg


# =======================================================
# M - Deps generation
# =======================================================
Expand Down
878 changes: 445 additions & 433 deletions core/dbt/events/types_pb2.py

Large diffs are not rendered by default.

81 changes: 77 additions & 4 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from copy import deepcopy
from dataclasses import dataclass
from dataclasses import field
from datetime import datetime
import datetime
import os
import traceback
from typing import (
Expand All @@ -22,6 +22,7 @@
from dbt.events.base_types import EventLevel
import json
import pprint
import msgpack

import dbt.exceptions
import dbt.tracking
Expand Down Expand Up @@ -51,6 +52,9 @@
StateCheckVarsHash,
Note,
PublicationArtifactChanged,
DeprecatedModel,
DeprecatedReference,
UpcomingReferenceDeprecation,
)
from dbt.logger import DbtProcessState
from dbt.node_types import NodeType, AccessType
Expand Down Expand Up @@ -131,6 +135,36 @@
PERF_INFO_FILE_NAME = "perf_info.json"


def extended_mashumaro_encoder(data):
return msgpack.packb(data, default=extended_msgpack_encoder, use_bin_type=True)


def extended_msgpack_encoder(obj):
if type(obj) is datetime.date:
date_bytes = msgpack.ExtType(1, obj.isoformat().encode())
return date_bytes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The msgpack docs outline an alternative method to encoding custom types by returning something more like:

{'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}

It looks to be interchangeable with ExtType usage as you've implemented. The ExtType approach feels more readable + extensible to custom data types beyond datetime

elif type(obj) is datetime.datetime:
datetime_bytes = msgpack.ExtType(2, obj.isoformat().encode())
return datetime_bytes

return obj


def extended_mashumuro_decoder(data):
return msgpack.unpackb(data, ext_hook=extended_msgpack_decoder, raw=False)


def extended_msgpack_decoder(code, data):
if code == 1:
d = datetime.date.fromisoformat(data.decode())
return d
elif code == 2:
dt = datetime.datetime.fromisoformat(data.decode())
return dt
else:
return msgpack.ExtType(code, data)


class ReparseReason(StrEnum):
version_mismatch = "01_version_mismatch"
file_not_found = "02_file_not_found"
Expand Down Expand Up @@ -511,8 +545,46 @@ def load(self):
# write out the fully parsed manifest
self.write_manifest_for_partial_parse()

self.check_for_model_deprecations()

return self.manifest

def check_for_model_deprecations(self):
for node in self.manifest.nodes.values():
if isinstance(node, ModelNode):
if (
node.deprecation_date
and node.deprecation_date < datetime.datetime.now().astimezone()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@peterallenwebb want to double-check two things with you:

  1. naive deprecation_dates
  2. single stable time

naive deprecation_dates

I believe this would be dependent on the system time wherever the python is being executed. So we'll probably want to double-check that any deprecation_date expressed without an offset like 2023-05-17 is understood to have its offset from the system time zone.

single stable time

Were you able to try out something akin to run_started_at?

It might be useful for using a single stable value over the course of the entire execution. It might also be useful from a testing perspective as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dbeatty10 Yes, your understanding is correct, and the function you commented on elsewhere ensures that any date or datetime without an explicit timezone is re-interpreted as being in the system timezone and made non-naive.

I wasn't able to figure out a convenient way of accessing run_started_at in the context where this is evaluated, but it is worth following up on when we have some time, IMO. I agree with your point about testing, especially, which often overlooked even by experienced engineers.

):
fire_event(
DeprecatedModel(
model_name=node.name,
model_version=str(node.version),
deprecation_date=node.deprecation_date.isoformat(),
)
)

resolved_refs = self.manifest.resolve_refs(node, self.root_project.project_name)
resolved_model_refs = [r for r in resolved_refs if isinstance(r, ModelNode)]
for resolved_ref in resolved_model_refs:
if resolved_ref.deprecation_date:

if resolved_ref.deprecation_date < datetime.datetime.now().astimezone():
event_cls = DeprecatedReference
else:
event_cls = UpcomingReferenceDeprecation

fire_event(
event_cls(
model_name=node.name,
ref_model_package=resolved_ref.package_name,
ref_model_name=resolved_ref.name,
ref_model_version=str(resolved_ref.version),
ref_model_latest_version=str(resolved_ref.latest_version),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto here and on the line above re: string conversion and error messages: https://github.com/dbt-labs/dbt-core/pull/7562/files#r1199287517

ref_model_deprecation_date=resolved_ref.deprecation_date.isoformat(),
)
)

def load_and_parse_macros(self, project_parser_files):
for project in self.all_projects.values():
if project.project_name not in project_parser_files:
Expand Down Expand Up @@ -658,7 +730,7 @@ def write_manifest_for_partial_parse(self):
UnableToPartialParse(reason="saved manifest contained the wrong version")
)
self.manifest.metadata.dbt_version = __version__
manifest_msgpack = self.manifest.to_msgpack()
manifest_msgpack = self.manifest.to_msgpack(extended_mashumaro_encoder)
make_directory(os.path.dirname(path))
with open(path, "wb") as fp:
fp.write(manifest_msgpack)
Expand Down Expand Up @@ -872,14 +944,14 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
try:
with open(path, "rb") as fp:
manifest_mp = fp.read()
manifest: Manifest = Manifest.from_msgpack(manifest_mp) # type: ignore
manifest: Manifest = Manifest.from_msgpack(manifest_mp, decoder=extended_mashumuro_decoder) # type: ignore
# keep this check inside the try/except in case something about
# the file has changed in weird ways, perhaps due to being a
# different version of dbt
is_partial_parsable, reparse_reason = self.is_partial_parsable(manifest)
if is_partial_parsable:
# We don't want to have stale generated_at dates
manifest.metadata.generated_at = datetime.utcnow()
manifest.metadata.generated_at = datetime.datetime.utcnow()
# or invocation_ids
manifest.metadata.invocation_id = get_invocation_id()
return manifest
Expand Down Expand Up @@ -1718,6 +1790,7 @@ def write_publication_artifact(root_project: RuntimeConfig, manifest: Manifest):
latest_version=model.latest_version,
public_node_dependencies=list(public_node_dependencies),
generated_at=metadata.generated_at,
deprecation_date=model.deprecation_date,
)
public_models[unique_id] = public_model

Expand Down
Loading