Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def get_long_description():
| {
# Acryl Data maintains a fork of PyHive, which adds support for table comments
# and column comments, and also releases HTTP and HTTPS transport schemes.
"acryl-pyhive[hive]>=0.6.7"
"acryl-pyhive[hive]>=0.6.9"
},
"ldap": {"python-ldap>=2.4"},
"looker": {"looker-sdk==21.6.0"},
Expand Down
5 changes: 5 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ class HiveConfig(BasicSQLAlchemyConfig):
# defaults
scheme = "hive"

# Hive SQLAlchemy connector returns views as tables.
# See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273.
# Disabling views helps us prevent this duplication.
include_views = False

def get_identifier(self, schema: str, table: str) -> str:
regular = f"{schema}.{table}"
if self.database:
Expand Down
20 changes: 14 additions & 6 deletions metadata-ingestion/src/datahub/ingestion/source/sql_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type

from sqlalchemy import create_engine, inspect
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.sql import sqltypes as types

from datahub.configuration.common import AllowDenyPattern, ConfigModel
Expand Down Expand Up @@ -240,7 +241,7 @@ def get_workunits(self) -> Iterable[SqlWorkUnit]:

def loop_tables(
self,
inspector: Any,
inspector: Inspector,
schema: str,
sql_config: SQLAlchemyConfig,
) -> Iterable[SqlWorkUnit]:
Expand All @@ -255,7 +256,9 @@ def loop_tables(

columns = inspector.get_columns(table, schema)
try:
table_info: dict = inspector.get_table_comment(table, schema)
# SQLALchemy stubs are incomplete and missing this method.
# PR: https://github.com/dropbox/sqlalchemy-stubs/pull/223.
table_info: dict = inspector.get_table_comment(table, schema) # type: ignore
except NotImplementedError:
description: Optional[str] = None
properties: Dict[str, str] = {}
Expand Down Expand Up @@ -291,7 +294,7 @@ def loop_tables(

def loop_views(
self,
inspector: Any,
inspector: Inspector,
schema: str,
sql_config: SQLAlchemyConfig,
) -> Iterable[SqlWorkUnit]:
Expand All @@ -306,7 +309,9 @@ def loop_views(

columns = inspector.get_columns(view, schema)
try:
view_info: dict = inspector.get_table_comment(view, schema)
# SQLALchemy stubs are incomplete and missing this method.
# PR: https://github.com/dropbox/sqlalchemy-stubs/pull/223.
view_info: dict = inspector.get_table_comment(view, schema) # type: ignore
except NotImplementedError:
description: Optional[str] = None
properties: Dict[str, str] = {}
Expand All @@ -316,8 +321,11 @@ def loop_views(
# The "properties" field is a non-standard addition to SQLAlchemy's interface.
properties = view_info.get("properties", {})

view_definition = inspector.get_view_definition(view)
if view_definition is None:
try:
view_definition = inspector.get_view_definition(view, schema)
if view_definition is None:
view_definition = ""
except NotImplementedError:
view_definition = ""
properties["view_definition"] = view_definition
properties["is_view"] = "True"
Expand Down