From 43ec3368b351de09eaef4103221fa7f0c804249d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Jan 2021 22:21:52 +0000 Subject: [PATCH 1/4] Bump lxml from 4.5.2 to 4.6.2 Bumps [lxml](https://github.com/lxml/lxml) from 4.5.2 to 4.6.2. - [Release notes](https://github.com/lxml/lxml/releases) - [Changelog](https://github.com/lxml/lxml/blob/master/CHANGES.txt) - [Commits](https://github.com/lxml/lxml/compare/lxml-4.5.2...lxml-4.6.2) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 10665d2..4832127 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ argparse==1.2.1 libarchive-c==2.9 -lxml==4.5.2 +lxml==4.6.2 psycopg2-binary==2.8.4 six==1.10.0 From 49d8358a83bb573c63c2715b122afb2fca98376c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 12 Jun 2021 13:06:55 +0200 Subject: [PATCH 2/4] Bump lxml from 4.6.2 to 4.6.3 (#14) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4832127..196607c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ argparse==1.2.1 libarchive-c==2.9 -lxml==4.6.2 +lxml==4.6.3 psycopg2-binary==2.8.4 six==1.10.0 From 634c05739e99f1903b88e816939d98ccd2f1f834 Mon Sep 17 00:00:00 2001 From: Rodrigo Morales <74389646+rdrg109@users.noreply.github.com> Date: Sat, 4 Sep 2021 03:31:35 -0500 Subject: [PATCH 3/4] Create tables in the specified schema to avoid moving the tables afterwards (#21) Fixes #18. * Execute "moveTableToSchema" if table was succesfully created * Function for getting the connection parameters added * Function buildConnectionString deleted This is because the parameters are now obtained through the function getConnectionParameters * dbConnectionParam and moveTableToSchema replaced by single function dbConnectionParam has been deleted because now all parameters are obtained by a single function which is called in the function handleTable. moveTableToSchema has been deleted because tables are now created in the specified schema. Therefore, there is no need to move the table after their creation. Co-authored-by: rdrg109 <> --- load_into_pg.py | 73 +++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/load_into_pg.py b/load_into_pg.py index 32c05aa..2d6cef5 100755 --- a/load_into_pg.py +++ b/load_into_pg.py @@ -46,24 +46,30 @@ def show_progress(block_num, block_size, total_size): file_part = None six.print_("") +def getConnectionParameters(): + """Get the parameters for the connection to the database.""" -def buildConnectionString(dbname, mbHost, mbPort, mbUsername, mbPassword): - dbConnectionParam = "dbname={}".format(dbname) + parameters = {} - if mbPort is not None: - dbConnectionParam += " port={}".format(mbPort) + if args.dbname: + parameters['dbname'] = args.dbname - if mbHost is not None: - dbConnectionParam += " host={}".format(mbHost) + if args.host: + parameters['host'] = args.host - # TODO Is the escaping done here correct? - if mbUsername is not None: - dbConnectionParam += " user={}".format(mbUsername) + if args.port: + parameters['port'] = args.port - # TODO Is the escaping done here correct? - if mbPassword is not None: - dbConnectionParam += " password={}".format(mbPassword) - return dbConnectionParam + if args.username: + parameters['user'] = args.username + + if args.password: + parameters['password'] = args.password + + if args.schema_name: + parameters['options'] = "-c search_path=" + args.schema_name + + return parameters def _makeDefValues(keys): @@ -174,7 +180,7 @@ def _getTableKeys(table): return keys -def handleTable(table, insertJson, createFk, mbDbFile, dbConnectionParam): +def handleTable(table, insertJson, createFk, mbDbFile): """Handle the table including the post/pre processing.""" keys = _getTableKeys(table) dbFile = mbDbFile if mbDbFile is not None else table + ".xml" @@ -193,7 +199,7 @@ def handleTable(table, insertJson, createFk, mbDbFile, dbConnectionParam): sys.exit(-1) try: - with pg.connect(dbConnectionParam) as conn: + with pg.connect(**getConnectionParameters()) as conn: with conn.cursor() as cur: try: with open(dbFile, "rb") as xml: @@ -273,29 +279,8 @@ def handleTable(table, insertJson, createFk, mbDbFile, dbConnectionParam): six.print_("Warning from the database.", file=sys.stderr) six.print_("pg.Warning: {0}".format(str(w)), file=sys.stderr) - -def moveTableToSchema(table, schemaName, dbConnectionParam): - try: - with pg.connect(dbConnectionParam) as conn: - with conn.cursor() as cur: - # create the schema - cur.execute("CREATE SCHEMA IF NOT EXISTS " + schemaName + ";") - conn.commit() - # move the table to the right schema - cur.execute("ALTER TABLE " + table + " SET SCHEMA " + schemaName + ";") - conn.commit() - except pg.Error as e: - six.print_("Error in dealing with the database.", file=sys.stderr) - six.print_("pg.Error ({0}): {1}".format(e.pgcode, e.pgerror), file=sys.stderr) - six.print_(str(e), file=sys.stderr) - except pg.Warning as w: - six.print_("Warning from the database.", file=sys.stderr) - six.print_("pg.Warning: {0}".format(str(w)), file=sys.stderr) - - ############################################################# - parser = argparse.ArgumentParser() parser.add_argument( "-t", @@ -384,10 +369,6 @@ def moveTableToSchema(table, schemaName, dbConnectionParam): except NameError: pass -dbConnectionParam = buildConnectionString( - args.dbname, args.host, args.port, args.username, args.password -) - # load given file in table if args.file and args.table: table = args.table @@ -398,14 +379,13 @@ def moveTableToSchema(table, schemaName, dbConnectionParam): specialRules[("Posts", "Body")] = "NULL" choice = input("This will drop the {} table. Are you sure [y/n]?".format(table)) + if len(choice) > 0 and choice[0].lower() == "y": handleTable( - table, args.insert_json, args.foreign_keys, args.file, dbConnectionParam - ) + table, args.insert_json, args.foreign_keys, args.file) else: six.print_("Cancelled.") - if args.schema_name != "public": - moveTableToSchema(table, args.schema_name, dbConnectionParam) + exit(0) # load a project @@ -453,7 +433,7 @@ def moveTableToSchema(table, schemaName, dbConnectionParam): for table in tables: six.print_("Load {0}.xml file".format(table)) - handleTable(table, args.insert_json, args.foreign_keys, None, dbConnectionParam) + handleTable(table, args.insert_json, args.foreign_keys, None) # remove file os.remove(table + ".xml") @@ -465,9 +445,6 @@ def moveTableToSchema(table, schemaName, dbConnectionParam): else: six.print_("Archive '{0}' deleted".format(filepath)) - if args.schema_name != "public": - for table in tables: - moveTableToSchema(table, args.schema_name, dbConnectionParam) exit(0) else: From 8bcb72858963daec7ee6688caef7a5f260f5489e Mon Sep 17 00:00:00 2001 From: Utkarsh Upadhyay <502876+musically-ut@users.noreply.github.com> Date: Sat, 4 Sep 2021 10:34:34 +0200 Subject: [PATCH 4/4] Acknowledge @rdrg109's contributions. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f9026c4..2781792 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,7 @@ and from [StackExchange Data Explorer](http://data.stackexchange.com). ## Quickstart -Install requirements, create a `stackoverflow` database, and use -`load_into_pg.py` script: +Install requirements, create a new database (e.g. `beerSO` below), and use `load_into_pg.py` script: ``` console $ pip install -r requirements.txt @@ -83,5 +82,6 @@ schema name: `SET search_path TO ;` ## Acknowledgement -[@madtibo](https://github.com/madtibo) made significant contributions by adding `jsonb` and Foreign Key support. -[@bersace](https://github.com/bersace) brought the dependencies and the `README.md` instructions into 2020. + - [@madtibo](https://github.com/madtibo) made significant contributions by adding `jsonb` and Foreign Key support. + - [@bersace](https://github.com/bersace) brought the dependencies and the `README.md` instructions into 2020s. + - [@rdrg109](https://github.com/rdrg109) simplified handling of non-public schemas and fixed bugs associated with re-importing tables.