Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
52ca902
alter_add_col: initial changes
xwu0226 Nov 21, 2016
f498fa6
add testcases
xwu0226 Dec 1, 2016
522443e
negative testcases
xwu0226 Dec 1, 2016
1af2654
remove non-support testcase
xwu0226 Dec 5, 2016
ec57ee9
fix testcase
xwu0226 Dec 5, 2016
ec74849
update testcases
xwu0226 Dec 7, 2016
8fca889
update testcases
xwu0226 Dec 7, 2016
4a17529
update testcases
xwu0226 Jan 13, 2017
9699128
comments for command caseclass
xwu0226 Jan 20, 2017
9860e5c
udate comments based on review
xwu0226 Jan 21, 2017
dfff364
SPARK-19261: update to support datasource table and add new testcases
xwu0226 Feb 3, 2017
9f23254
remove workaournd for parquet issue since parquet-1.8.2 is now supported
xwu0226 Feb 4, 2017
180092f
SPARK-19261: using white list for datasource table types that support…
xwu0226 Feb 7, 2017
5a8aa80
fix code style
xwu0226 Feb 7, 2017
d3860e6
fix coding style
xwu0226 Feb 7, 2017
55577aa
update upon review
xwu0226 Feb 24, 2017
6fa913a
refactor code from alterTable function
xwu0226 Feb 25, 2017
7231efe
rebase and resolve conflict
xwu0226 Mar 6, 2017
e4e9ecf
resolve conflicts
xwu0226 Mar 9, 2017
75e7441
using ExternalCatalog.alterTableSchema
xwu0226 Mar 14, 2017
9847030
add InMemoryCatalog testcases
xwu0226 Mar 15, 2017
1a383bb
revert change in HiveExernalCatalog.scala
xwu0226 Mar 15, 2017
f994ce9
update upon review
xwu0226 Mar 16, 2017
5bf7360
add checking for duplicate column names
xwu0226 Mar 16, 2017
599c45e
add case sensativity for duplicate name checking and new testcases
xwu0226 Mar 16, 2017
b3edfea
typo
xwu0226 Mar 16, 2017
7d8a515
resolve conflicts and modify testcases
xwu0226 Mar 17, 2017
e895278
update testcases
xwu0226 Mar 17, 2017
e171ac4
move checkduplicate and schema arrangement to SessionCatalog.alterTab…
xwu0226 Mar 17, 2017
4391edd
change SessionCatalog.alterTableAddColumn back to alterTableSchema
xwu0226 Mar 18, 2017
a3fef12
update upon review comments
xwu0226 Mar 18, 2017
1eb7cd3
some minor updates upon review comments
xwu0226 Mar 19, 2017
04ce8f4
update based on review
xwu0226 Mar 21, 2017
7d8437d
update on minor comments
xwu0226 Mar 21, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
SPARK-19261: using white list for datasource table types that support…
…s alter add columns
  • Loading branch information
xwu0226 committed Mar 19, 2017
commit 180092f038a5c7957633a799c010cda17a2eea60
Original file line number Diff line number Diff line change
Expand Up @@ -764,9 +764,7 @@ object DDLUtils {
val HIVE_PROVIDER = "hive"

def isHiveTable(table: CatalogTable): Boolean = {
// When `CatalogTable` is directly fetched from the catalog,
// CatalogTable.provider = None means the table is a Hive serde table.
!table.provider.isDefined || table.provider.get.toLowerCase == HIVE_PROVIDER
table.provider.isDefined && table.provider.get.toLowerCase == HIVE_PROVIDER
}

def isDatasourceTable(table: CatalogTable): Boolean = {
Expand Down Expand Up @@ -817,50 +815,4 @@ object DDLUtils {
}
}
}

/**
* ALTER TABLE ADD COLUMNS command does not support temporary view/table,
* view, or datasource table with text, orc formats or external provider.
*/
def verifyAlterTableAddColumn(
catalog: SessionCatalog,
table: TableIdentifier): CatalogTable = {
if (catalog.isTemporaryTable(table)) {
throw new AnalysisException(
s"${table.toString} is a temporary VIEW, which does not support ALTER ADD COLUMNS.")
}

val catalogTable = catalog.getTableMetadata(table)
if (catalogTable.tableType == CatalogTableType.VIEW) {
throw new AnalysisException(
s"${table.toString} is a VIEW, which does not support ALTER ADD COLUMNS.")
}

if (isDatasourceTable(catalogTable)) {
catalogTable.provider.get match {
case provider if provider.toLowerCase == "text" =>
// TextFileFormat can not support adding column either because text datasource table
// is resolved as a single-column table only.
throw new AnalysisException(
s"""${table.toString} is a text format datasource table,
|which does not support ALTER ADD COLUMNS.""".stripMargin)
case provider if provider.toLowerCase == "orc"
|| provider.startsWith("org.apache.spark.sql.hive.orc") =>
// TODO Current native orc reader can not handle the difference between
// user-specified schema and inferred schema from ORC data file yet.
throw new AnalysisException(
s"""${table.toString} is an ORC datasource table,
|which does not support ALTER ADD COLUMNS.""".stripMargin)
case provider
if (!DataSource.lookupDataSource(provider).newInstance().isInstanceOf[FileFormat]) =>
// For datasource table, we only support HadoopFsRelation
throw new AnalysisException(
s"""${table.toString} is a datasource table with external provider,
|which does not support ALTER ADD COLUMNS.""".stripMargin)
case _ =>
}
}

catalogTable
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.util.quoteIdentifier
import org.apache.spark.sql.execution.datasources.PartitioningUtils
import org.apache.spark.sql.execution.datasources.{DataSource, FileFormat, PartitioningUtils}
import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils

Expand Down Expand Up @@ -187,7 +190,7 @@ case class AlterTableAddColumnsCommand(
columns: Seq[StructField]) extends RunnableCommand {
override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
val catalogTable = DDLUtils.verifyAlterTableAddColumn(catalog, table)
val catalogTable = verifyAlterTableAddColumn(catalog, table)

// If an exception is thrown here we can just assume the table is uncached;
// this can happen with Hive tables when the underlying catalog is in-memory.
Expand All @@ -210,6 +213,41 @@ case class AlterTableAddColumnsCommand(

Seq.empty[Row]
}

/**
* ALTER TABLE ADD COLUMNS command does not support temporary view/table,
* view, or datasource table with text, orc formats or external provider.
*/
private def verifyAlterTableAddColumn(
catalog: SessionCatalog,
table: TableIdentifier): CatalogTable = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indent

val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)

if (catalogTable.tableType == CatalogTableType.VIEW) {
throw new AnalysisException(
s"${table.toString} is a VIEW, which does not support ALTER ADD COLUMNS.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about?

ALTER ADD COLUMNS does not support views. You must drop and re-create the views for adding the new columns. Views: $table.

}

if (DDLUtils.isDatasourceTable(catalogTable)) {
DataSource.lookupDataSource(catalogTable.provider.get).newInstance() match {
// For datasource table, this command can only support the following File format.
// TextFileFormat only default to one column "value"
// OrcFileFormat can not handle difference between user-specified schema and
// inferred schema yet. TODO, once this issue is resolved , we can add Orc back.
// Hive type is already considered as hive serde table, so the logic will not
// come in here.
case _: JsonFileFormat =>
case _: CSVFileFormat =>
case _: ParquetFileFormat =>
case s =>
throw new AnalysisException(
s"""${table.toString} is a datasource table with type $s,
|which does not support ALTER ADD COLUMNS.""".stripMargin)
}
}

catalogTable
}
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2001,17 +2001,17 @@ class HiveDDLSuite
}
}

test("alter datasource table add columns - orc format not supported") {
Seq("orc", "ORC", "org.apache.spark.sql.hive.orc",
"org.apache.spark.sql.hive.orc.DefaultSource").foreach { source =>
withTable("alter_add_ds_text") {
sql(s"CREATE TABLE alter_add_ds_text (c1 int) USING $source")
val e = intercept[AnalysisException] {
sql("ALTER TABLE alter_add_ds_text ADD COLUMNS (c2 int)")
}.getMessage
assert(e.contains("does not support ALTER ADD COLUMNS"))
}
Seq("orc", "ORC", "org.apache.spark.sql.hive.orc",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's remove it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. will do.

"org.apache.spark.sql.hive.orc.DefaultSource").foreach { source =>
test(s"alter datasource table add columns - $source format not supported") {
withTable("alter_add_ds_text") {
sql(s"CREATE TABLE alter_add_ds_text (c1 int) USING $source")
val e = intercept[AnalysisException] {
sql("ALTER TABLE alter_add_ds_text ADD COLUMNS (c2 int)")
}.getMessage
assert(e.contains("does not support ALTER ADD COLUMNS"))
}
}
}

test("alter table add columns -- not support temp view") {
Expand All @@ -2020,7 +2020,7 @@ class HiveDDLSuite
val e = intercept[AnalysisException] {
sql("alter table tmp_v add columns (c3 int)")
}
assert(e.message.contains("is a temporary VIEW, which does not support ALTER ADD COLUMNS"))
assert(e.message.contains("is a VIEW, which does not support ALTER ADD COLUMNS"))
}
}

Expand Down