datamill-co
diff --git a/‎setup.py‎
Lines changed: 4 additions & 3 deletions b/‎setup.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎target_postgres/__init__.py‎ renamed to ‎target_sql/__init__.py‎
Lines changed: 30 additions & 36 deletions b/‎target_postgres/__init__.py‎ renamed to ‎target_sql/__init__.py‎
Lines changed: 30 additions & 36 deletions
diff --git a/‎target_postgres/pysize.py‎ renamed to ‎target_sql/pysize.py‎ b/‎target_postgres/pysize.py‎ renamed to ‎target_sql/pysize.py‎
diff --git a/‎target_postgres/singer_stream.py‎ renamed to ‎target_sql/singer_stream.py‎
Lines changed: 1 addition & 1 deletion b/‎target_postgres/singer_stream.py‎ renamed to ‎target_sql/singer_stream.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎target_sql/target_postgres.py‎
Lines changed: 86 additions & 0 deletions b/‎target_sql/target_postgres.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎target_sql/target_redshift.py‎
Lines changed: 107 additions & 0 deletions b/‎target_sql/target_redshift.py‎
Lines changed: 107 additions & 0 deletions
@@ -3,9 +3,9 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='target-postgres',
+    name='target-sql',
     version="0.0.1",
-    description='Singer.io target for loading data into postgres',
+    description='Singer.io targets for loading data into SQL databases',
     classifiers=['Programming Language :: Python :: 3 :: Only'],
     py_modules=['target_postgres'],
     install_requires=[
@@ -16,7 +16,8 @@
     ],
     entry_points='''
       [console_scripts]
-      target-postgres=target_postgres:main
+      target-postgres=target_sql:target_postgres_main
+      target-redshift=target_sql:target_redshift_main
     ''',
     packages=find_packages()
 )
@@ -9,13 +9,14 @@
 from singer import utils, metadata, metrics
 import psycopg2
 
-from target_postgres.postgres import PostgresTarget
-from target_postgres.singer_stream import BufferedSingerStream
+from target_sql.target_postgres import TargetPostgres
+from target_sql.target_redshift import TargetRedshift
+from target_sql.singer_stream import BufferedSingerStream
 
 LOGGER = singer.get_logger()
 
 REQUIRED_CONFIG_KEYS = [
-    'postgres_database'
+    'target_connection'
 ]
 
 def flush_stream(target, stream_buffer):
@@ -87,47 +88,40 @@ def line_handler(streams, target, max_batch_rows, max_batch_size, line):
             line_data['type'],
             line))
 
-def main(config, input_stream=None):
+def target_sql(target_class, config, input_stream=None):
     try:
-        connection = psycopg2.connect(
-            host=config.get('postgres_host', 'localhost'),
-            port=config.get('postgres_port', 5432),
-            dbname=config.get('postgres_database'),
-            user=config.get('postgres_username'),
-            password=config.get('postgres_password'))
-
-        streams = {}
-        postgres_target = PostgresTarget(
-            connection,
-            LOGGER,
-            postgres_schema=config.get('postgres_schema', 'public'))
-
-        max_batch_rows = config.get('max_batch_rows')
-        max_batch_size = config.get('max_batch_size')
-        batch_detection_threshold = config.get('batch_detection_threshold', 5000)
-
-        if not input_stream:
-            input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
-
-        line_count = 0
-        for line in input_stream:
-            line_handler(streams, postgres_target, max_batch_rows, max_batch_size, line)
-            if line_count > 0 and line_count % batch_detection_threshold == 0:
-                flush_streams(streams, postgres_target)
-            line_count += 1
-
-        flush_streams(streams, postgres_target, force=True)
-
-        connection.close()
+        with target_class(config, LOGGER) as target:
+            max_batch_rows = config.get('max_batch_rows')
+            max_batch_size = config.get('max_batch_size')
+            batch_detection_threshold = config.get('batch_detection_threshold', 5000)
+
+            if not input_stream:
+                input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+
+            line_count = 0
+            streams = {}
+            for line in input_stream:
+                line_handler(streams, target, max_batch_rows, max_batch_size, line)
+                if line_count > 0 and line_count % batch_detection_threshold == 0:
+                    flush_streams(streams, target)
+                line_count += 1
+
+            flush_streams(streams, target, force=True)
     except Exception as e:
         LOGGER.critical(e)
         raise e
 
-if __name__ == "__main__":
+def main(target_class):
     try:
         args = utils.parse_args(REQUIRED_CONFIG_KEYS)
 
-        main(args.config)
+        target_sql(target_class, args.config)
     except Exception as e:
         LOGGER.critical(e)
         raise e
+
+def target_postgres_main():
+    main(TargetPostgres)
+
+def target_redshift_main():
+    main(TargetRedshift)
@@ -1,7 +1,7 @@
 from jsonschema import Draft4Validator, FormatChecker
 import arrow
 
-from target_postgres.pysize import get_size
+from target_sql.pysize import get_size
 
 SINGER_RECEIVED_AT = '_sdc_received_at'
 SINGER_BATCHED_AT = '_sdc_batched_at'
 
@@ -0,0 +1,86 @@
+import psycopg2
+from psycopg2 import sql
+
+from target_sql.target_sql import TargetSQL, TransformStream
+
+class TargetPostgres(TargetSQL):
+    def create_connection(self, config):
+        connection = config.get('target_connection')
+
+        self.conn = psycopg2.connect(
+            host=connection.get('host', 'localhost'),
+            port=connection.get('port', 5432),
+            dbname=connection.get('database'),
+            user=connection.get('username'),
+            password=connection.get('password'))
+
+    def destroy_connection(self):
+        self.conn.close()
+
+    def sql_to_json_schema(self, sql_type, nullable):
+        _format = None
+        if sql_type == 'timestamp with time zone':
+            json_type = 'string'
+            _format = 'date-time'
+        elif sql_type == 'bigint':
+            json_type = 'integer'
+        elif sql_type == 'double precision':
+            json_type = 'number'
+        elif sql_type == 'boolean':
+            json_type = 'boolean'
+        elif sql_type == 'text':
+            json_type = 'string'
+        else:
+            raise Exception('Unsupported type `{}` in existing target table'.format(sql_type))
+
+        if nullable:
+            json_type = ['null', json_type]
+
+        json_schema = {'type': json_type}
+        if _format:
+            json_schema['format'] = _format
+
+        return json_schema
+
+    def json_schema_to_sql(self, json_schema):
+        _type = json_schema['type']
+        not_null = True
+        if isinstance(_type, list):
+            ln = len(_type)
+            if ln == 1:
+                _type = _type[0]
+            if ln == 2 and 'null' in _type:
+                not_null = False
+                if _type.index('null') == 0:
+                    _type = _type[1]
+                else:
+                    _type = _type[0]
+            elif ln > 2:
+                raise Exception('Multiple types per column not supported')
+
+        sql_type = 'text'
+
+        if 'format' in json_schema and \
+           json_schema['format'] == 'date-time' and \
+           _type == 'string':
+            sql_type = 'timestamp with time zone'
+        elif _type == 'boolean':
+            sql_type = 'boolean'
+        elif _type == 'integer':
+            sql_type = 'bigint'
+        elif _type == 'number':
+            sql_type = 'double precision'
+
+        if not_null:
+            sql_type += ' NOT NULL'
+
+        return sql_type
+
+    def copy_rows(self, cur, table_name, headers, row_fn):
+        rows = TransformStream(row_fn)
+
+        copy = sql.SQL('COPY {}.{} ({}) FROM STDIN CSV').format(
+            sql.Identifier(self.catalog),
+            sql.Identifier(table_name),
+            sql.SQL(', ').join(map(sql.Identifier, headers)))
+        cur.copy_expert(copy, rows)
@@ -0,0 +1,107 @@
+import uuid
+
+import boto3
+from psycopg2 import sql
+
+from target_sql.target_postgres import TargetPostgres, TransformStream
+
+class TargetRedshift(TargetPostgres):
+    def __init__(self, config, *args, **kwargs):
+        s3_config = config.get('target_s3')
+        if not s3_config:
+            raise Exception('`target_s3` required')
+        self.s3_config = s3_config
+
+        super(TargetRedshift, self).__init__(config, *args, **kwargs)
+
+    def sql_to_json_schema(self, sql_type, nullable):
+        _format = None
+        if sql_type == 'timestamp with time zone':
+            json_type = 'string'
+            _format = 'date-time'
+        elif sql_type == 'bigint':
+            json_type = 'integer'
+        elif sql_type == 'double precision':
+            json_type = 'number'
+        elif sql_type == 'boolean':
+            json_type = 'boolean'
+        elif sql_type[:7] == 'varchar':
+            json_type = 'string'
+        else:
+            raise Exception('Unsupported type `{}` in existing target table'.format(sql_type))
+
+        if nullable:
+            json_type = ['null', json_type]
+
+        json_schema = {'type': json_type}
+        if _format:
+            json_schema['format'] = _format
+
+        return json_schema
+
+    def json_schema_to_sql(self, json_schema):
+        _type = json_schema['type']
+        not_null = True
+        if isinstance(_type, list):
+            ln = len(_type)
+            if ln == 1:
+                _type = _type[0]
+            if ln == 2 and 'null' in _type:
+                not_null = False
+                if _type.index('null') == 0:
+                    _type = _type[1]
+                else:
+                    _type = _type[0]
+            elif ln > 2:
+                raise Exception('Multiple types per column not supported')
+
+        sql_type = 'varchar(65535)'
+
+        if 'format' in json_schema and \
+           json_schema['format'] == 'date-time' and \
+           _type == 'string':
+            sql_type = 'timestamp with time zone'
+        elif _type == 'boolean':
+            sql_type = 'boolean'
+        elif _type == 'integer':
+            sql_type = 'bigint'
+        elif _type == 'number':
+            sql_type = 'double precision'
+
+        if not_null:
+            sql_type += ' NOT NULL'
+
+        return sql_type
+
+    def copy_rows(self, cur, table_name, headers, row_fn):
+        s3_client = boto3.client(
+            's3',
+            aws_access_key_id=self.s3_config.get('aws_access_key_id'),
+            aws_secret_access_key=self.s3_config.get('aws_secret_access_key'))
+
+        bucket = self.s3_config.get('bucket')
+        if not bucket:
+            raise Exception('`target_s3.bucket` required')
+        prefix = self.s3_config.get('key_prefix', '')
+        key = prefix + table_name + self.NESTED_SEPARATOR + str(uuid.uuid4()).replace('-', '')
+
+        rows = TransformStream(row_fn, binary=True)
+
+        s3_client.upload_fileobj(
+            rows,
+            bucket,
+            key)
+
+        source = 's3://{}/{}'.format(bucket, key)
+        credentials = 'aws_access_key_id={};aws_secret_access_key={};'.format(
+            self.s3_config.get('aws_access_key_id'),
+            self.s3_config.get('aws_secret_access_key'))
+
+        copy_sql = sql.SQL('COPY {}.{} ({}) FROM {} CREDENTIALS {} FORMAT AS CSV').format(
+            sql.Identifier(self.catalog),
+            sql.Identifier(table_name),
+            sql.SQL(', ').join(map(sql.Identifier, headers)),
+            sql.Literal(source),
+            sql.Literal(credentials))
+
+        cur.execute(copy_sql)