diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 79f0a9f4af587a..a7561249fe94cb 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -190,6 +190,8 @@ source: ### Microsoft SQL Server Metadata `mssql` +We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections. + Extracts: - List of databases, schema, and tables @@ -216,8 +218,40 @@ source: # documentation will be a good reference for what is supported. To find which dialect is likely # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html. charset: "utf8" + # If set to true, we'll use the pyodbc library. This requires you to have + # already installed the Microsoft ODBC Driver for SQL Server. + # See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15 + use_odbc: False + uri_args: {} +``` + +
+ Example: using ingestion with ODBC and encryption + +This requires you to have already installed the Microsoft ODBC Driver for SQL Server. +See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15 + +```yml +source: + type: mssql + config: + # See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc + use_odbc: True + username: user + password: pass + host_port: localhost:1433 + database: DemoDatabase + uri_args: + # See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15 + driver: "ODBC Driver 17 for SQL Server" + Encrypt: "yes" + TrustServerCertificate: "Yes" + ssl: "True" + # Trusted_Connection: "yes" ``` +
+ ### Hive `hive` Extracts: diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 53bfc8507f8a3e..df6813de8cc5e7 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -77,6 +77,7 @@ def get_long_description(): "acryl-pyhive[hive]>=0.6.6" }, "mssql": sql_common | {"sqlalchemy-pytds>=0.3"}, + "mssql-odbc": sql_common | {"pyodbc"}, "mysql": sql_common | {"pymysql>=1.0.2"}, "postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"}, "redshift": sql_common | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"}, diff --git a/metadata-ingestion/src/datahub/ingestion/source/mssql.py b/metadata-ingestion/src/datahub/ingestion/source/mssql.py index d257806fa2b812..8f7cb8a8edffe4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mssql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mssql.py @@ -1,3 +1,8 @@ +import urllib.parse +from typing import Dict + +import pydantic + # This import verifies that the dependencies are available. import sqlalchemy_pytds # noqa: F401 @@ -9,6 +14,29 @@ class SQLServerConfig(BasicSQLAlchemyConfig): host_port = "localhost:1433" scheme = "mssql+pytds" + use_odbc: bool = False + uri_args: Dict[str, str] = {} + + @pydantic.validator("uri_args") + def passwords_match(cls, v, values, **kwargs): + if values["use_odbc"] and "driver" not in v: + raise ValueError("uri_args must contain a 'driver' option") + elif not values["use_odbc"] and v: + raise ValueError("uri_args is not supported when ODBC is disabled") + return v + + def get_sql_alchemy_url(self): + if self.use_odbc: + # Ensure that the import is available. + import pyodbc # noqa: F401 + + self.scheme = "mssql+pyodbc" + uri = super().get_sql_alchemy_url() + + if self.use_odbc: + uri = f"{uri}?{urllib.parse.urlencode(self.uri_args)}" + return uri + def get_identifier(self, schema: str, table: str) -> str: regular = f"{schema}.{table}" if self.database: diff --git a/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml b/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml index 78a3f8f782b354..c53e3cf6b80452 100644 --- a/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml +++ b/metadata-ingestion/tests/integration/sql_server/mssql_to_file.yml @@ -7,8 +7,11 @@ source: password: test!Password database: DemoData host_port: localhost:51433 + # use_odbc: True + # uri_args: + # driver: "ODBC Driver 17 for SQL Server" sink: type: file config: - filename: './mssql_mces.json' + filename: "./mssql_mces.json"