From 40392dd3d26decfcf94cc08dc73505718091b901 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 4 Jun 2021 16:18:35 -0700 Subject: [PATCH 1/2] fix(ingest): support mssql encryption --- metadata-ingestion/README.md | 34 +++++++++++++++++++ metadata-ingestion/setup.py | 1 + .../src/datahub/ingestion/source/mssql.py | 28 +++++++++++++++ .../integration/sql_server/mssql_to_file.yml | 4 ++- 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 79f0a9f4af587a..a7561249fe94cb 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -190,6 +190,8 @@ source: ### Microsoft SQL Server Metadata `mssql` +We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections. + Extracts: - List of databases, schema, and tables @@ -216,8 +218,40 @@ source: # documentation will be a good reference for what is supported. To find which dialect is likely # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html. charset: "utf8" + # If set to true, we'll use the pyodbc library. This requires you to have + # already installed the Microsoft ODBC Driver for SQL Server. + # See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15 + use_odbc: False + uri_args: {} +``` + +
+ Example: using ingestion with ODBC and encryption + +This requires you to have already installed the Microsoft ODBC Driver for SQL Server. +See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15 + +```yml +source: + type: mssql + config: + # See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc + use_odbc: True + username: user + password: pass + host_port: localhost:1433 + database: DemoDatabase + uri_args: + # See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15 + driver: "ODBC Driver 17 for SQL Server" + Encrypt: "yes" + TrustServerCertificate: "Yes" + ssl: "True" + # Trusted_Connection: "yes" ``` +
+ ### Hive `hive` Extracts: diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 53bfc8507f8a3e..df6813de8cc5e7 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -77,6 +77,7 @@ def get_long_description(): "acryl-pyhive[hive]>=0.6.6" }, "mssql": sql_common | {"sqlalchemy-pytds>=0.3"}, + "mssql-odbc": sql_common | {"pyodbc"}, "mysql": sql_common | {"pymysql>=1.0.2"}, "postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"}, "redshift": sql_common | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"}, diff --git a/metadata-ingestion/src/datahub/ingestion/source/mssql.py b/metadata-ingestion/src/datahub/ingestion/source/mssql.py index d257806fa2b812..fe115e7b1e6b22 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mssql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mssql.py @@ -1,3 +1,8 @@ +import urllib.parse +from typing import Dict + +import pydantic + # This import verifies that the dependencies are available. import sqlalchemy_pytds # noqa: F401 @@ -9,6 +14,29 @@ class SQLServerConfig(BasicSQLAlchemyConfig): host_port = "localhost:1433" scheme = "mssql+pytds" + use_odbc: bool = False + uri_args: Dict[str, str] + + @pydantic.validator("uri_args") + def passwords_match(cls, v, values, **kwargs): + if values["use_odbc"] and "driver" not in v: + raise ValueError("uri_args must contain a 'driver' option") + elif not values["use_odbc"] and v: + raise ValueError("uri_args is not supported when ODBC is disabled") + return v + + def get_sql_alchemy_url(self): + if self.use_odbc: + # Ensure that the import is available. + import pyodbc # noqa: F401 + + self.scheme = "mssql+pyodbc" + uri = super().get_sql_alchemy_url() + + if self.use_odbc: + uri = f"{uri}?{urllib.parse.urlencode(self.uri_args)}" + return uri + def get_identifier(self, schema: str, table: str) -> str: regular = f"{schema}.{table}" if self.database: diff --git a/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml b/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml index 78a3f8f782b354..97de1f8297ac87 100644 --- a/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml +++ b/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml @@ -7,8 +7,10 @@ source: password: test!Password database: DemoData host_port: localhost:51433 + use_odbc: False + uri_args: {} sink: type: file config: - filename: './mssql_mces.json' + filename: "./mssql_mces.json" From fb0ce85dbf5b5b55bb24db8878b4da75c3e2bfc1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 4 Jun 2021 17:29:09 -0700 Subject: [PATCH 2/2] update tests --- metadata-ingestion/src/datahub/ingestion/source/mssql.py | 2 +- .../tests/integration/sql_server/mssql_to_file.yml | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mssql.py b/metadata-ingestion/src/datahub/ingestion/source/mssql.py index fe115e7b1e6b22..8f7cb8a8edffe4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mssql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mssql.py @@ -15,7 +15,7 @@ class SQLServerConfig(BasicSQLAlchemyConfig): scheme = "mssql+pytds" use_odbc: bool = False - uri_args: Dict[str, str] + uri_args: Dict[str, str] = {} @pydantic.validator("uri_args") def passwords_match(cls, v, values, **kwargs): diff --git a/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml b/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml index 97de1f8297ac87..c53e3cf6b80452 100644 --- a/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml +++ b/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml @@ -7,8 +7,9 @@ source: password: test!Password database: DemoData host_port: localhost:51433 - use_odbc: False - uri_args: {} + # use_odbc: True + # uri_args: + # driver: "ODBC Driver 17 for SQL Server" sink: type: file