diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index f6a6b21f995c6..893171197f8bb 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -98,9 +98,27 @@ def get_long_description(): "mssql-odbc", } +mypy_stubs = { + # for Python 3.6 support + "dataclasses", + "types-dataclasses", + "sqlalchemy-stubs", + "types-pkg_resources", + "types-six", + "types-python-dateutil", + "types-requests", + "types-toml", + "types-PyMySQL", + "types-PyYAML", + "types-freezegun", + # versions 0.1.13 and 0.1.14 seem to have issues + "types-click==0.1.12", +} + base_dev_requirements = { *base_requirements, *framework_common, + *mypy_stubs, "black>=19.10b0", "coverage>=5.1", "flake8>=3.8.3", @@ -110,7 +128,6 @@ def get_long_description(): "pytest-cov>=2.8.1", "pytest-docker", "tox", - "sqlalchemy-stubs", "deepdiff", "requests-mock", "freezegun", @@ -126,6 +143,7 @@ def get_long_description(): "ldap", "looker", "glue", + "hive", "datahub-kafka", "datahub-rest", # airflow is added below diff --git a/metadata-ingestion/src/datahub/ingestion/source/hive.py b/metadata-ingestion/src/datahub/ingestion/source/hive.py index 7bc06bc64659f..dc5f2a149d852 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/hive.py @@ -22,6 +22,12 @@ class HiveConfig(BasicSQLAlchemyConfig): # defaults scheme = "hive" + def get_identifier(self, schema: str, table: str) -> str: + regular = f"{schema}.{table}" + if self.database: + return f"{self.database}.{regular}" + return regular + class HiveSource(SQLAlchemySource): def __init__(self, config, ctx): diff --git a/metadata-ingestion/tests/unit/test_hive_source.py b/metadata-ingestion/tests/unit/test_hive_source.py new file mode 100644 index 0000000000000..f3f08444f6a31 --- /dev/null +++ b/metadata-ingestion/tests/unit/test_hive_source.py @@ -0,0 +1,23 @@ +import unittest + +from datahub.ingestion.source.hive import HiveConfig + + +class HiveSinkTest(unittest.TestCase): + def test_hive_configuration_get_indentifier_with_database(self): + test_db_name = "test_database" + test_schema_name = "test_schema" + test_table_name = "test_table" + config_dict = { + "username": "test", + "password": "test", + "host_port": "test:80", + "database": test_db_name, + "scheme": "hive+https", + } + hive_config = HiveConfig.parse_obj(config_dict) + expected_output = f"{test_db_name}.{test_schema_name}.{test_table_name}" + output = hive_config.get_identifier( + schema=test_schema_name, table=test_table_name + ) + assert output == expected_output