Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ statement
rowFormat? createFileFormat? locationSpec?
(TBLPROPERTIES tablePropertyList)?
(AS? query)? #createHiveTable
| CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
LIKE source=tableIdentifier #createTableLike
| CREATE EXTERNAL? TABLE (IF NOT EXISTS)? target=tableIdentifier
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since Spark 2.2, we wanna hide the manage/external concept from users. It looks reasonable to add a LOCATION statement in CREATE TABLE LIKE, but do we really need the EXTERNAL keyword? We don't need to be exactly same with hive.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok then let's simplify the logic: if location is specified, we create an external table internally. Else, create managed table.

LIKE source=tableIdentifier locationSpec? #createTableLike
| ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS
(identifier | FOR COLUMNS identifierSeq)? #analyze
| ALTER (TABLE | VIEW) from=tableIdentifier
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1140,14 +1140,18 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
*
* For example:
* {{{
* CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
* LIKE [other_db_name.]existing_table_name
* CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
* LIKE [other_db_name.]existing_table_name [locationSpec]
* }}}
*/
override def visitCreateTableLike(ctx: CreateTableLikeContext): LogicalPlan = withOrigin(ctx) {
val targetTable = visitTableIdentifier(ctx.target)
val sourceTable = visitTableIdentifier(ctx.source)
CreateTableLikeCommand(targetTable, sourceTable, ctx.EXISTS != null)
val location = Option(ctx.locationSpec).map(visitLocationSpec)
if (ctx.EXTERNAL != null && location.isEmpty) {
Copy link
Member

@gatorsmile gatorsmile Jan 24, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment above this line:

    // If we are creating an EXTERNAL table, then the LOCATION field is required

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I'll do it later, Thanks!

operationNotAllowed("CREATE EXTERNAL TABLE LIKE must be accompanied by LOCATION", ctx)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To the other reviewers, we are following what we did in visitCreateHiveTable

CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != null)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import org.apache.spark.sql.types._
import org.apache.spark.util.Utils

/**
* A command to create a MANAGED table with the same definition of the given existing table.
* A command to create a table with the same definition of the given existing table.
* In the target table definition, the table comment is always empty but the column comments
* are identical to the ones defined in the source table.
*
Expand All @@ -51,13 +51,14 @@ import org.apache.spark.util.Utils
*
* The syntax of using this command in SQL is:
* {{{
* CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
* LIKE [other_db_name.]existing_table_name
* CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no EXTERNAL

* LIKE [other_db_name.]existing_table_name [locationSpec]
* }}}
*/
case class CreateTableLikeCommand(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update the comment of this class.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok,i will update it later,Thanks!

targetTable: TableIdentifier,
sourceTable: TableIdentifier,
location: Option[String],
ifNotExists: Boolean) extends RunnableCommand {

override def run(sparkSession: SparkSession): Seq[Row] = {
Expand All @@ -70,12 +71,19 @@ case class CreateTableLikeCommand(
sourceTableDesc.provider
}

val tblType = if (location.isEmpty) {
CatalogTableType.MANAGED
} else {
CatalogTableType.EXTERNAL
}

val newTableDesc =
CatalogTable(
identifier = targetTable,
tableType = CatalogTableType.MANAGED,
// We are creating a new managed table, which should not have custom table location.
storage = sourceTableDesc.storage.copy(locationUri = None),
tableType = tblType,
// If location is not empty the table we are creating is a new external table
// otherwise managed table.
storage = sourceTableDesc.storage.copy(locationUri = location),
schema = sourceTableDesc.schema,
provider = newProvider,
partitionColumnNames = sourceTableDesc.partitionColumnNames,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle

test("create table like") {
val v1 = "CREATE TABLE table1 LIKE table2"
val (target, source, exists) = parser.parsePlan(v1).collect {
case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting)
val (target, source, location, exists) = parser.parsePlan(v1).collect {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add an assert to check location is empty

case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
}.head
assert(exists == false)
assert(target.database.isEmpty)
Expand All @@ -528,8 +528,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
assert(source.table == "table2")

val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add one more test case to check CREATE TABLE LIKE with location

val (target2, source2, exists2) = parser.parsePlan(v2).collect {
case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting)
val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect {
case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
}.head
assert(exists2)
assert(target2.database.isEmpty)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,32 @@ class HiveDDLSuite
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceTable, targetTable)
checkCreateTableLike(sourceTable, targetTable, "MANAGED")
}
}
}

test("CREATE [EXTERNAL] TABLE LIKE a temporary view LOCATION...") {
for ( i <- 0 to 1 ) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can create a method with parameter location: Option[String], instead of writing a for loop with 2 iterations...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I write this for the purpose of reusing this piece of public code, because the basic logic of these two scenarios are almost the same.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

creating a method and wrap this piece of code can also reuse the code.

withTempDir {tmpDir =>
val sourceViewName = "tab1"
val targetTabName = "tab2"
val basePath = tmpDir.toURI
withTempView(sourceViewName) {
withTable(targetTabName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.createTempView(sourceViewName)
val tblType = if (i == 0) "" else "EXTERNAL"
sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath")

val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata(
TableIdentifier(sourceViewName))
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceTable, targetTable, "EXTERNAL")
}
}
}
}
}
Expand All @@ -847,7 +872,35 @@ class HiveDDLSuite
assert(DDLUtils.isDatasourceTable(sourceTable))
assert(sourceTable.tableType == CatalogTableType.MANAGED)

checkCreateTableLike(sourceTable, targetTable)
checkCreateTableLike(sourceTable, targetTable, "MANAGED")
}
}

test("CREATE [EXTERNAL] TABLE LIKE a data source table LOCATION...") {
for ( i <- 0 to 1 ) {
withTempDir { tmpDir =>
val sourceTabName = "tab1"
val targetTabName = "tab2"
val basePath = tmpDir.toURI
withTable(sourceTabName, targetTabName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.write.format("json").saveAsTable(sourceTabName)
val tblType = if (i == 0) "" else "EXTERNAL"
sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath")

val sourceTable =
spark.sessionState.catalog.getTableMetadata(
TableIdentifier(sourceTabName, Some("default")))
val targetTable =
spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))
// The table type of the source table should be a Hive-managed data source table
assert(DDLUtils.isDatasourceTable(sourceTable))
assert(sourceTable.tableType == CatalogTableType.MANAGED)

checkCreateTableLike(sourceTable, targetTable, "EXTERNAL")
}
}
}
}

Expand All @@ -871,7 +924,38 @@ class HiveDDLSuite
assert(DDLUtils.isDatasourceTable(sourceTable))
assert(sourceTable.tableType == CatalogTableType.EXTERNAL)

checkCreateTableLike(sourceTable, targetTable)
checkCreateTableLike(sourceTable, targetTable, "MANAGED")
}
}
}

test("CREATE [EXTERNAL] TABLE LIKE an external data source table LOCATION...") {
for ( i <- 0 to 1 ) {
withTempDir { tmpDir =>
val sourceTabName = "tab1"
val targetTabName = "tab2"
val basePath = tmpDir.toURI
withTable(sourceTabName, targetTabName) {
withTempPath { dir =>
val path = dir.getCanonicalPath
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.write.format("parquet").save(path)
sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')")
val tblType = if (i == 0) "" else "EXTERNAL"
sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath")

// The source table should be an external data source table
val sourceTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(sourceTabName, Some("default")))
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))
// The table type of the source table should be an external data source table
assert(DDLUtils.isDatasourceTable(sourceTable))
assert(sourceTable.tableType == CatalogTableType.EXTERNAL)

checkCreateTableLike(sourceTable, targetTable, "EXTERNAL")
}
}
}
}
}
Expand All @@ -889,7 +973,32 @@ class HiveDDLSuite
assert(sourceTable.properties.get("prop1").nonEmpty)
val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceTable, targetTable)
checkCreateTableLike(sourceTable, targetTable, "MANAGED")
}
}

test("CREATE [EXTERNAL] TABLE LIKE a managed Hive serde table LOCATION...") {
for ( i <- 0 to 1 ) {
val catalog = spark.sessionState.catalog
withTempDir { tmpDir =>
val sourceTabName = "tab1"
val targetTabName = "tab2"
val basePath = tmpDir.toURI
withTable(sourceTabName, targetTabName) {
sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'")
val tblType = if (i == 0) "" else "EXTERNAL"
sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath")

val sourceTable = catalog.getTableMetadata(
TableIdentifier(sourceTabName, Some("default")))
assert(sourceTable.tableType == CatalogTableType.MANAGED)
assert(sourceTable.properties.get("prop1").nonEmpty)
val targetTable = catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceTable, targetTable, "EXTERNAL")
}
}
}
}

Expand Down Expand Up @@ -923,11 +1032,55 @@ class HiveDDLSuite
assert(sourceTable.comment == Option("Apache Spark"))
val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceTable, targetTable)
checkCreateTableLike(sourceTable, targetTable, "MANAGED")
}
}
}

test("CREATE [EXTERNAL] TABLE LIKE an external Hive serde table LOCATION...") {
for ( i <- 0 to 1 ) {
val catalog = spark.sessionState.catalog
withTempDir { tmpDir =>
val basePath = tmpDir.toURI
withTempDir { tmpDir1 =>
val basePath1 = tmpDir1.toURI
val sourceTabName = "tab1"
val targetTabName = "tab2"
withTable(sourceTabName, targetTabName) {
assert(tmpDir.listFiles.isEmpty)
sql(
s"""
|CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING)
|COMMENT 'Apache Spark'
|PARTITIONED BY (ds STRING, hr STRING)
|LOCATION '$basePath'
""".stripMargin)
for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) {
sql(
s"""
|INSERT OVERWRITE TABLE $sourceTabName
|partition (ds='$ds',hr='$hr')
|SELECT 1, 'a'
""".stripMargin)
}
val tblType = if (i == 0) "" else "EXTERNAL"
sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath1")

val sourceTable = catalog.getTableMetadata(
TableIdentifier(sourceTabName, Some("default")))
assert(sourceTable.tableType == CatalogTableType.EXTERNAL)
assert(sourceTable.comment == Option("Apache Spark"))
val targetTable = catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceTable, targetTable, "EXTERNAL")
}
}
}
}

}

test("CREATE TABLE LIKE a view") {
val sourceTabName = "tab1"
val sourceViewName = "view"
Expand All @@ -947,15 +1100,51 @@ class HiveDDLSuite
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceView, targetTable)
checkCreateTableLike(sourceView, targetTable, "MANAGED")
}
}
}

test("CREATE [EXTERNAL] TABLE LIKE a view LOCATION...") {
for ( i <- 0 to 1 ) {
withTempDir { tmpDir =>
val sourceTabName = "tab1"
val sourceViewName = "view"
val targetTabName = "tab2"
val basePath = tmpDir.toURI
withTable(sourceTabName, targetTabName) {
withView(sourceViewName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.write.format("json").saveAsTable(sourceTabName)
sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName")
val tblType = if (i == 0) "" else "EXTERNAL"
sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath")

val sourceView = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(sourceViewName, Some("default")))
// The original source should be a VIEW with an empty path
assert(sourceView.tableType == CatalogTableType.VIEW)
assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty)
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))

checkCreateTableLike(sourceView, targetTable, "EXTERNAL")
}
}
}
}

}

private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = {
// The created table should be a MANAGED table with empty view text and original text.
assert(targetTable.tableType == CatalogTableType.MANAGED,
"the created table must be a Hive managed table")
private def checkCreateTableLike(
sourceTable: CatalogTable,
targetTable: CatalogTable,
tableType: String): Unit = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not pass in a CataogTableType instead of a string?

// The created table should be a MANAGED table or EXTERNAL table with empty view text
// and original text.
val expectTableType = CatalogTableType.apply(tableType)
assert(targetTable.tableType == expectTableType,
s"the created table must be a Hive ${expectTableType.name} table")
assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty,
"the view text and original text in the created table must be empty")
assert(targetTable.comment.isEmpty,
Expand Down