Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ statement
(AS? query)? #createTable
| createTableHeader ('(' columns=colTypeList ')')?
((COMMENT comment=STRING) |
(PARTITIONED BY '(' partitionColumns=colTypeList ')') |
(PARTITIONED BY '(' partitionColumns=colTypeList ')' |
PARTITIONED BY partitionColumnNames=identifierList) |
bucketSpec |
skewSpec |
rowFormat |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1196,33 +1196,40 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {

selectQuery match {
case Some(q) =>
// Hive does not allow to use a CTAS statement to create a partitioned table.
if (tableDesc.partitionColumnNames.nonEmpty) {
val errorMessage = "A Create Table As Select (CTAS) statement is not allowed to " +
"create a partitioned table using Hive's file formats. " +
"Please use the syntax of \"CREATE TABLE tableName USING dataSource " +
"OPTIONS (...) PARTITIONED BY ...\" to create a partitioned table through a " +
"CTAS statement."
operationNotAllowed(errorMessage, ctx)
}

// Don't allow explicit specification of schema for CTAS.
if (schema.nonEmpty) {
if (dataCols.nonEmpty) {
operationNotAllowed(
"Schema may not be specified in a Create Table As Select (CTAS) statement",
ctx)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this check should go first.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, because val schema = StructType(dataCols ++ partitionCols) is defined, if this check goes first, it will shadow the next check if (tableDesc.partitionColumnNames.nonEmpty).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then can we check dataCols directly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. That's good.

}

// When creating partitioned table with CTAS statement, we can't specify data type for the
// partition columns.
if (partitionCols.nonEmpty) {
Copy link
Member Author

@viirya viirya Dec 26, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the check of tableDesc.partitionColumnNames to partitionCols. They are the same effect here, but partitionCols is more accurate and less confusing.

val errorMessage = "Create Partitioned Table As Select cannot specify data type for " +
"the partition columns of the target table."
operationNotAllowed(errorMessage, ctx)
}

// Hive CTAS supports dynamic partition by specifying partition column names.
val partitionColumnNames =
Option(ctx.partitionColumnNames)
.map(visitIdentifierList(_).toArray)
.getOrElse(Array.empty[String])

val tableDescWithPartitionColNames =
tableDesc.copy(partitionColumnNames = partitionColumnNames)

val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0)
if (conf.convertCTAS && !hasStorageProperties) {
// At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
// are empty Maps.
val newTableDesc = tableDesc.copy(
val newTableDesc = tableDescWithPartitionColNames.copy(
storage = CatalogStorageFormat.empty.copy(locationUri = locUri),
provider = Some(conf.defaultDataSourceName))
CreateTable(newTableDesc, mode, Some(q))
} else {
CreateTable(tableDesc, mode, Some(q))
CreateTable(tableDescWithPartitionColNames, mode, Some(q))
}
case None => CreateTable(tableDesc, mode, None)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.execution

import java.io.File
import java.net.URI
import java.util.Date

import scala.language.existentials

Expand All @@ -33,6 +32,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive.HiveExternalCatalog
Expand Down Expand Up @@ -2370,4 +2370,50 @@ class HiveDDLSuite
))
}
}

test("Hive CTAS can't create partitioned table by specifying schema") {
val err1 = intercept[ParseException] {
spark.sql(
s"""
|CREATE TABLE t (a int)
|PARTITIONED BY (b string)
|STORED AS parquet
|AS SELECT 1 as a, "a" as b
""".stripMargin)
}.getMessage
assert(err1.contains("Schema may not be specified in a Create Table As Select " +
"(CTAS) statement"))

val err2 = intercept[ParseException] {
spark.sql(
s"""
|CREATE TABLE t
|PARTITIONED BY (b string)
|STORED AS parquet
|AS SELECT 1 as a, "a" as b
""".stripMargin)
}.getMessage
assert(err2.contains("Create Partitioned Table As Select cannot specify data type for " +
"the partition columns of the target table"))
}

test("Hive CTAS with dynamic partition") {
Seq("orc", "parquet").foreach { format =>
withTable("t") {
withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
spark.sql(
s"""
|CREATE TABLE t
|PARTITIONED BY (b)
|STORED AS $format
|AS SELECT 1 as a, "a" as b
""".stripMargin)
checkAnswer(spark.table("t"), Row(1, "a"))

assert(spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
.partitionColumnNames === Seq("b"))
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -692,8 +692,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|AS SELECT key, value FROM mytable1
""".stripMargin)
}.getMessage
assert(e.contains("A Create Table As Select (CTAS) statement is not allowed to " +
"create a partitioned table using Hive's file formats"))
assert(e.contains("Create Partitioned Table As Select cannot specify data type for " +
"the partition columns of the target table"))
}
}
}
Expand Down