-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19115] [SQL] Supporting Create External Table Like Location #16638
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
adde008
713ca97
b80f8e6
71f1d12
9e59fb4
bb3660a
5dd21b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1140,14 +1140,18 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| * | ||
| * For example: | ||
| * {{{ | ||
| * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name | ||
| * LIKE [other_db_name.]existing_table_name | ||
| * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name | ||
| * LIKE [other_db_name.]existing_table_name [locationSpec] | ||
| * }}} | ||
| */ | ||
| override def visitCreateTableLike(ctx: CreateTableLikeContext): LogicalPlan = withOrigin(ctx) { | ||
| val targetTable = visitTableIdentifier(ctx.target) | ||
| val sourceTable = visitTableIdentifier(ctx.source) | ||
| CreateTableLikeCommand(targetTable, sourceTable, ctx.EXISTS != null) | ||
| val location = Option(ctx.locationSpec).map(visitLocationSpec) | ||
| if (ctx.EXTERNAL != null && location.isEmpty) { | ||
|
||
| operationNotAllowed("CREATE EXTERNAL TABLE LIKE must be accompanied by LOCATION", ctx) | ||
| } | ||
|
||
| CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != null) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ import org.apache.spark.sql.types._ | |
| import org.apache.spark.util.Utils | ||
|
|
||
| /** | ||
| * A command to create a MANAGED table with the same definition of the given existing table. | ||
| * A command to create a table with the same definition of the given existing table. | ||
| * In the target table definition, the table comment is always empty but the column comments | ||
| * are identical to the ones defined in the source table. | ||
| * | ||
|
|
@@ -51,13 +51,14 @@ import org.apache.spark.util.Utils | |
| * | ||
| * The syntax of using this command in SQL is: | ||
| * {{{ | ||
| * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name | ||
| * LIKE [other_db_name.]existing_table_name | ||
| * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name | ||
|
||
| * LIKE [other_db_name.]existing_table_name [locationSpec] | ||
| * }}} | ||
| */ | ||
| case class CreateTableLikeCommand( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please update the comment of this class.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok,i will update it later,Thanks! |
||
| targetTable: TableIdentifier, | ||
| sourceTable: TableIdentifier, | ||
| location: Option[String], | ||
| ifNotExists: Boolean) extends RunnableCommand { | ||
|
|
||
| override def run(sparkSession: SparkSession): Seq[Row] = { | ||
|
|
@@ -70,12 +71,19 @@ case class CreateTableLikeCommand( | |
| sourceTableDesc.provider | ||
| } | ||
|
|
||
| val tblType = if (location.isEmpty) { | ||
| CatalogTableType.MANAGED | ||
| } else { | ||
| CatalogTableType.EXTERNAL | ||
| } | ||
|
|
||
| val newTableDesc = | ||
| CatalogTable( | ||
| identifier = targetTable, | ||
| tableType = CatalogTableType.MANAGED, | ||
| // We are creating a new managed table, which should not have custom table location. | ||
| storage = sourceTableDesc.storage.copy(locationUri = None), | ||
| tableType = tblType, | ||
| // If location is not empty the table we are creating is a new external table | ||
| // otherwise managed table. | ||
| storage = sourceTableDesc.storage.copy(locationUri = location), | ||
| schema = sourceTableDesc.schema, | ||
| provider = newProvider, | ||
| partitionColumnNames = sourceTableDesc.partitionColumnNames, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -518,8 +518,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle | |
|
|
||
| test("create table like") { | ||
| val v1 = "CREATE TABLE table1 LIKE table2" | ||
| val (target, source, exists) = parser.parsePlan(v1).collect { | ||
| case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting) | ||
| val (target, source, location, exists) = parser.parsePlan(v1).collect { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add an assert to check |
||
| case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) | ||
| }.head | ||
| assert(exists == false) | ||
| assert(target.database.isEmpty) | ||
|
|
@@ -528,8 +528,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle | |
| assert(source.table == "table2") | ||
|
|
||
| val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add one more test case to check CREATE TABLE LIKE with location |
||
| val (target2, source2, exists2) = parser.parsePlan(v2).collect { | ||
| case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting) | ||
| val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect { | ||
| case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) | ||
| }.head | ||
| assert(exists2) | ||
| assert(target2.database.isEmpty) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -826,7 +826,32 @@ class HiveDDLSuite | |
| val targetTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, "MANAGED") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("CREATE [EXTERNAL] TABLE LIKE a temporary view LOCATION...") { | ||
| for ( i <- 0 to 1 ) { | ||
|
||
| withTempDir {tmpDir => | ||
| val sourceViewName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val basePath = tmpDir.toURI | ||
| withTempView(sourceViewName) { | ||
| withTable(targetTabName) { | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .createTempView(sourceViewName) | ||
| val tblType = if (i == 0) "" else "EXTERNAL" | ||
| sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath") | ||
|
|
||
| val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( | ||
| TableIdentifier(sourceViewName)) | ||
| val targetTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -847,7 +872,35 @@ class HiveDDLSuite | |
| assert(DDLUtils.isDatasourceTable(sourceTable)) | ||
| assert(sourceTable.tableType == CatalogTableType.MANAGED) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, "MANAGED") | ||
| } | ||
| } | ||
|
|
||
| test("CREATE [EXTERNAL] TABLE LIKE a data source table LOCATION...") { | ||
| for ( i <- 0 to 1 ) { | ||
| withTempDir { tmpDir => | ||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val basePath = tmpDir.toURI | ||
| withTable(sourceTabName, targetTabName) { | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .write.format("json").saveAsTable(sourceTabName) | ||
| val tblType = if (i == 0) "" else "EXTERNAL" | ||
| sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") | ||
|
|
||
| val sourceTable = | ||
| spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(sourceTabName, Some("default"))) | ||
| val targetTable = | ||
| spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
| // The table type of the source table should be a Hive-managed data source table | ||
| assert(DDLUtils.isDatasourceTable(sourceTable)) | ||
| assert(sourceTable.tableType == CatalogTableType.MANAGED) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -871,7 +924,38 @@ class HiveDDLSuite | |
| assert(DDLUtils.isDatasourceTable(sourceTable)) | ||
| assert(sourceTable.tableType == CatalogTableType.EXTERNAL) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, "MANAGED") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("CREATE [EXTERNAL] TABLE LIKE an external data source table LOCATION...") { | ||
| for ( i <- 0 to 1 ) { | ||
| withTempDir { tmpDir => | ||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val basePath = tmpDir.toURI | ||
| withTable(sourceTabName, targetTabName) { | ||
| withTempPath { dir => | ||
| val path = dir.getCanonicalPath | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .write.format("parquet").save(path) | ||
| sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") | ||
| val tblType = if (i == 0) "" else "EXTERNAL" | ||
| sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") | ||
|
|
||
| // The source table should be an external data source table | ||
| val sourceTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(sourceTabName, Some("default"))) | ||
| val targetTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
| // The table type of the source table should be an external data source table | ||
| assert(DDLUtils.isDatasourceTable(sourceTable)) | ||
| assert(sourceTable.tableType == CatalogTableType.EXTERNAL) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -889,7 +973,32 @@ class HiveDDLSuite | |
| assert(sourceTable.properties.get("prop1").nonEmpty) | ||
| val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, "MANAGED") | ||
| } | ||
| } | ||
|
|
||
| test("CREATE [EXTERNAL] TABLE LIKE a managed Hive serde table LOCATION...") { | ||
| for ( i <- 0 to 1 ) { | ||
| val catalog = spark.sessionState.catalog | ||
| withTempDir { tmpDir => | ||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val basePath = tmpDir.toURI | ||
| withTable(sourceTabName, targetTabName) { | ||
| sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") | ||
| val tblType = if (i == 0) "" else "EXTERNAL" | ||
| sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") | ||
|
|
||
| val sourceTable = catalog.getTableMetadata( | ||
| TableIdentifier(sourceTabName, Some("default"))) | ||
| assert(sourceTable.tableType == CatalogTableType.MANAGED) | ||
| assert(sourceTable.properties.get("prop1").nonEmpty) | ||
| val targetTable = catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -923,11 +1032,55 @@ class HiveDDLSuite | |
| assert(sourceTable.comment == Option("Apache Spark")) | ||
| val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, "MANAGED") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("CREATE [EXTERNAL] TABLE LIKE an external Hive serde table LOCATION...") { | ||
| for ( i <- 0 to 1 ) { | ||
| val catalog = spark.sessionState.catalog | ||
| withTempDir { tmpDir => | ||
| val basePath = tmpDir.toURI | ||
| withTempDir { tmpDir1 => | ||
| val basePath1 = tmpDir1.toURI | ||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| withTable(sourceTabName, targetTabName) { | ||
| assert(tmpDir.listFiles.isEmpty) | ||
| sql( | ||
| s""" | ||
| |CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING) | ||
| |COMMENT 'Apache Spark' | ||
| |PARTITIONED BY (ds STRING, hr STRING) | ||
| |LOCATION '$basePath' | ||
| """.stripMargin) | ||
| for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { | ||
| sql( | ||
| s""" | ||
| |INSERT OVERWRITE TABLE $sourceTabName | ||
| |partition (ds='$ds',hr='$hr') | ||
| |SELECT 1, 'a' | ||
| """.stripMargin) | ||
| } | ||
| val tblType = if (i == 0) "" else "EXTERNAL" | ||
| sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath1") | ||
|
|
||
| val sourceTable = catalog.getTableMetadata( | ||
| TableIdentifier(sourceTabName, Some("default"))) | ||
| assert(sourceTable.tableType == CatalogTableType.EXTERNAL) | ||
| assert(sourceTable.comment == Option("Apache Spark")) | ||
| val targetTable = catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } | ||
|
|
||
| test("CREATE TABLE LIKE a view") { | ||
| val sourceTabName = "tab1" | ||
| val sourceViewName = "view" | ||
|
|
@@ -947,15 +1100,51 @@ class HiveDDLSuite | |
| val targetTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceView, targetTable) | ||
| checkCreateTableLike(sourceView, targetTable, "MANAGED") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("CREATE [EXTERNAL] TABLE LIKE a view LOCATION...") { | ||
| for ( i <- 0 to 1 ) { | ||
| withTempDir { tmpDir => | ||
| val sourceTabName = "tab1" | ||
| val sourceViewName = "view" | ||
| val targetTabName = "tab2" | ||
| val basePath = tmpDir.toURI | ||
| withTable(sourceTabName, targetTabName) { | ||
| withView(sourceViewName) { | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .write.format("json").saveAsTable(sourceTabName) | ||
| sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") | ||
| val tblType = if (i == 0) "" else "EXTERNAL" | ||
| sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath") | ||
|
|
||
| val sourceView = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(sourceViewName, Some("default"))) | ||
| // The original source should be a VIEW with an empty path | ||
| assert(sourceView.tableType == CatalogTableType.VIEW) | ||
| assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) | ||
| val targetTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceView, targetTable, "EXTERNAL") | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } | ||
|
|
||
| private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = { | ||
| // The created table should be a MANAGED table with empty view text and original text. | ||
| assert(targetTable.tableType == CatalogTableType.MANAGED, | ||
| "the created table must be a Hive managed table") | ||
| private def checkCreateTableLike( | ||
| sourceTable: CatalogTable, | ||
| targetTable: CatalogTable, | ||
| tableType: String): Unit = { | ||
|
||
| // The created table should be a MANAGED table or EXTERNAL table with empty view text | ||
| // and original text. | ||
| val expectTableType = CatalogTableType.apply(tableType) | ||
| assert(targetTable.tableType == expectTableType, | ||
| s"the created table must be a Hive ${expectTableType.name} table") | ||
| assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty, | ||
| "the view text and original text in the created table must be empty") | ||
| assert(targetTable.comment.isEmpty, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since Spark 2.2, we wanna hide the manage/external concept from users. It looks reasonable to add a
LOCATIONstatement inCREATE TABLE LIKE, but do we really need theEXTERNALkeyword? We don't need to be exactly same with hive.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am fine
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok then let's simplify the logic: if
locationis specified, we create an external table internally. Else, create managed table.