diff --git a/datahub-kubernetes/datahub/templates/datahub-upgrade/datahub-upgrade-job.yml b/datahub-kubernetes/datahub/templates/datahub-upgrade/datahub-upgrade-job.yml index 6c904cab23e90..2eecdbfcd9aed 100644 --- a/datahub-kubernetes/datahub/templates/datahub-upgrade/datahub-upgrade-job.yml +++ b/datahub-kubernetes/datahub/templates/datahub-upgrade/datahub-upgrade-job.yml @@ -39,7 +39,15 @@ spec: containers: - name: datahub-upgrade-job image: "{{ .Values.datahubUpgrade.image.repository }}:{{ .Values.datahubUpgrade.image.tag }}" - args: [ "-u", "NoCodeDataMigration", "-a", "batchSize=1000", "-a", "batchDelayMs=100" ] + args: + - "-u" + - "NoCodeDataMigration" + - "-a" + - "batchSize=1000" + - "-a" + - "batchDelayMs=100" + - "-a" + - "dbType={{ .Values.datahubUpgrade.noCodeDataMigration.sqlDbType }}" env: - name: DATAHUB_GMS_HOST value: {{ printf "%s-%s" .Release.Name "datahub-gms" }} diff --git a/datahub-kubernetes/datahub/values.yaml b/datahub-kubernetes/datahub/values.yaml index c6da52a720721..214f8faedab16 100644 --- a/datahub-kubernetes/datahub/values.yaml +++ b/datahub-kubernetes/datahub/values.yaml @@ -56,6 +56,8 @@ datahubUpgrade: image: repository: acryldata/datahub-upgrade tag: "v0.8.3" + noCodeDataMigration: + sqlDbType: "MYSQL" global: diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java index 55c8833f4f7b0..e46f81865eb77 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java @@ -7,9 +7,16 @@ import io.ebean.EbeanServer; import java.util.function.Function; -// Do we need SQL-tech specific migration paths? public class CreateAspectTableStep implements UpgradeStep { + private static final String DB_TYPE_ARG = "dbType"; + + enum DbType { + MYSQL, + POSTGRES, + MARIA + } + private final EbeanServer _server; public CreateAspectTableStep(final EbeanServer server) { @@ -29,18 +36,45 @@ public int retryCount() { @Override public Function executable() { return (context) -> { + + DbType targetDbType = context.parsedArgs().containsKey(DB_TYPE_ARG) + ? DbType.valueOf(context.parsedArgs().get(DB_TYPE_ARG).get()) + : DbType.MYSQL; + + String sqlUpdateStr; + + switch (targetDbType) { + case POSTGRES: + sqlUpdateStr = "CREATE TABLE IF NOT EXISTS metadata_aspect_v2 (\n" + + " urn varchar(500) not null,\n" + + " aspect varchar(200) not null,\n" + + " version bigint not null,\n" + + " metadata text not null,\n" + + " systemmetadata text,\n" + + " createdon timestamp not null,\n" + + " createdby varchar(255) not null,\n" + + " createdfor varchar(255),\n" + + " constraint pk_metadata_aspect_v2 primary key (urn,aspect,version)\n" + + ")"; + break; + default: + // both mysql and maria + sqlUpdateStr = "CREATE TABLE IF NOT EXISTS metadata_aspect_v2 (\n" + + " urn varchar(500) not null,\n" + + " aspect varchar(200) not null,\n" + + " version bigint(20) not null,\n" + + " metadata longtext not null,\n" + + " systemmetadata longtext,\n" + + " createdon datetime(6) not null,\n" + + " createdby varchar(255) not null,\n" + + " createdfor varchar(255),\n" + + " constraint pk_metadata_aspect_v2 primary key (urn,aspect,version)\n" + + ")"; + break; + } + try { - _server.execute(_server.createSqlUpdate("CREATE TABLE IF NOT EXISTS metadata_aspect_v2 (\n" - + " urn varchar(500) not null,\n" - + " aspect varchar(200) not null,\n" - + " version bigint(20) not null,\n" - + " metadata longtext not null,\n" - + " systemmetadata longtext,\n" - + " createdon datetime(6) not null,\n" - + " createdby varchar(255) not null,\n" - + " createdfor varchar(255),\n" - + " constraint pk_metadata_aspect primary key (urn,aspect,version)\n" - + ")")); + _server.execute(_server.createSqlUpdate(sqlUpdateStr)); } catch (Exception e) { context.report().addLine(String.format("Failed to create table metadata_aspect_v2: %s", e.toString())); return new DefaultUpgradeStepResult( diff --git a/docker/datahub-upgrade/README.md b/docker/datahub-upgrade/README.md index c9305e9a0c07e..3a32e0ee818ec 100644 --- a/docker/datahub-upgrade/README.md +++ b/docker/datahub-upgrade/README.md @@ -10,6 +10,7 @@ As of today, there are 2 supported upgrades: to metadata_aspect_v2 table. Arguments: - *batchSize* (Optional): The number of rows to migrate at a time. Defaults to 1000. - *batchDelayMs* (Optional): The number of milliseconds of delay between migrated batches. Used for rate limiting. Defaults to 250. + - *dbType* (optional): The target DB type. Valid values are `MYSQL`, `MARIA`, `POSTGRES`. Defaults to `MYSQL`. 2. **NoCodeDataMigrationCleanup**: Cleanses graph index, search index, and key-value store of legacy DataHub data (metadata_aspect table) once the No Code Data Migration has completed successfully. No arguments. diff --git a/docker/mariadb/init.sql b/docker/mariadb/init.sql index ad3e46fc379fe..f1a055808492f 100644 --- a/docker/mariadb/init.sql +++ b/docker/mariadb/init.sql @@ -8,7 +8,7 @@ create table metadata_aspect_v2 ( createdon datetime(6) not null, createdby varchar(255) not null, createdfor varchar(255), - constraint pk_metadata_aspect primary key (urn,aspect,version) + constraint pk_metadata_aspect_v2 primary key (urn,aspect,version) ); insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) values( diff --git a/docker/mysql-setup/init.sql b/docker/mysql-setup/init.sql index abd2be37fe089..926e1e85d05de 100644 --- a/docker/mysql-setup/init.sql +++ b/docker/mysql-setup/init.sql @@ -12,7 +12,7 @@ create table if not exists metadata_aspect_v2 ( createdon datetime(6) not null, createdby varchar(255) not null, createdfor varchar(255), - constraint pk_metadata_aspect primary key (urn,aspect,version) + constraint pk_metadata_aspect_v2 primary key (urn,aspect,version) ); -- create default records for datahub user if not exists diff --git a/docker/mysql/init.sql b/docker/mysql/init.sql index cd5427cfcfcb2..d5d82fd25a0e5 100644 --- a/docker/mysql/init.sql +++ b/docker/mysql/init.sql @@ -8,7 +8,7 @@ CREATE TABLE metadata_aspect_v2 ( createdon datetime(6) NOT NULL, createdby VARCHAR(255) NOT NULL, createdfor VARCHAR(255), - CONSTRAINT pk_metadata_aspect PRIMARY KEY (urn,aspect,version) + CONSTRAINT pk_metadata_aspect_v2 PRIMARY KEY (urn,aspect,version) ); INSERT INTO metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) VALUES( diff --git a/docker/postgres/init.sql b/docker/postgres/init.sql index 1959ad04016ee..9936acc4463e4 100644 --- a/docker/postgres/init.sql +++ b/docker/postgres/init.sql @@ -2,13 +2,13 @@ create table metadata_aspect_v2 ( urn varchar(500) not null, aspect varchar(200) not null, - version bigint(20) not null, + version bigint not null, metadata text not null, systemmetadata text, createdon timestamp not null, createdby varchar(255) not null, createdfor varchar(255), - constraint pk_metadata_aspect primary key (urn,aspect,version) + constraint pk_metadata_aspect_v2 primary key (urn,aspect,version) ); insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) values( diff --git a/docs/advanced/no-code-upgrade.md b/docs/advanced/no-code-upgrade.md index 734fa10538a18..c5247d7218f99 100644 --- a/docs/advanced/no-code-upgrade.md +++ b/docs/advanced/no-code-upgrade.md @@ -64,22 +64,31 @@ cd docker/datahub-upgrade/nocode ./run_upgrade.sh ``` -In both cases, the default environment variables will be used (`docker/datahub-upgrade/env/docker.env`). These assume -that your deployment is local. If this is not the case, you'll need to define your own environment variables to tell the -upgrade system where your DataHub containers reside. +Using this command, the default environment variables will be used (`docker/datahub-upgrade/env/docker.env`). These assume +that your deployment is local & that you are running MySQL. If this is not the case, you'll need to define your own environment variables to tell the +upgrade system where your DataHub containers reside and run -You can either +To update the default environment variables, you can either 1. Change `docker/datahub-upgrade/env/docker.env` in place and then run one of the above commands OR -2. Define a new ".env" file containing your variables and - execute `docker pull acryldata/datahub-upgrade && docker run acryldata/datahub-upgrade:latest -u NoCodeDataMigration` +2. Define a new ".env" file containing your variables and execute `docker pull acryldata/datahub-upgrade && docker run acryldata/datahub-upgrade:latest -u NoCodeDataMigration` To see the required environment variables, see the [datahub-upgrade](../../docker/datahub-upgrade/README.md) documentation. +To run the upgrade against a database other than MySQL, you can use the `-a dbType=` argument. + +Execute +``` +./docker/datahub-upgrade.sh -u NoCodeDataMigration -a dbType=POSTGRES +``` + +where dbType can be either `MYSQL`, `MARIA`, `POSTGRES`. + #### Docker Compose Deployments - Lose All Existing Data -This path is quickest but will wipe your Datahub's database. +This path is quickest but will wipe your DataHub's database. + If you want to make sure your current data is migrated, refer to the Docker Compose Deployments - Preserve Data section above. If you are ok losing your data and re-ingesting, this approach is simplest. @@ -98,7 +107,7 @@ git pull origin master ./docker/ingestion/ingestion.sh ``` -After that, you will be upgraded and good to go. +After that, you will be ready to go. ##### How to fix the "listening to port 5005" issue @@ -130,6 +139,9 @@ Once the storage layer has been migrated, subsequent runs of this job will be a ### Step 3 (Optional): Cleaning Up +Warning: This step clears all legacy metadata. If something is wrong with the upgraded metadata, there will no easy way to +re-run the migration. + This step involves removing data from previous versions of DataHub. This step should only be performed once you've validated that your DataHub deployment is healthy after performing the upgrade. If you're able to search, browse, and view your Metadata after the upgrade steps have been completed, you should be in good shape. @@ -147,11 +159,11 @@ cd docker/datahub-upgrade/nocode ./run_clean.sh ``` -In both cases, the default environment variables will be used (`docker/datahub-upgrade/env/docker.env`). These assume +Using this command, the default environment variables will be used (`docker/datahub-upgrade/env/docker.env`). These assume that your deployment is local. If this is not the case, you'll need to define your own environment variables to tell the upgrade system where your DataHub containers reside. -You can either +To update the default environment variables, you can either 1. Change `docker/datahub-upgrade/env/docker.env` in place and then run one of the above commands OR 2. Define a new ".env" file containing your variables and execute