SPARK-19261: using white list for datasource table types that support…

…s alter add columns
apache · xwu0226 · Nov 21, 2016 · Dec 1, 2016 · Dec 1, 2016 · Dec 5, 2016
commit 180092f038a5c7957633a799c010cda17a2eea60
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -764,9 +764,7 @@ object DDLUtils {
   val HIVE_PROVIDER = "hive"
 
   def isHiveTable(table: CatalogTable): Boolean = {
-    // When `CatalogTable` is directly fetched from the catalog,
-    // CatalogTable.provider = None means the table is a Hive serde table.
-    !table.provider.isDefined || table.provider.get.toLowerCase == HIVE_PROVIDER
+    table.provider.isDefined && table.provider.get.toLowerCase == HIVE_PROVIDER
   }
 
   def isDatasourceTable(table: CatalogTable): Boolean = {
@@ -817,50 +815,4 @@ object DDLUtils {
       }
     }
   }
-
-  /**
-   * ALTER TABLE ADD COLUMNS command does not support temporary view/table,
-   * view, or datasource table with text, orc formats or external provider.
-   */
-  def verifyAlterTableAddColumn(
-      catalog: SessionCatalog,
-      table: TableIdentifier): CatalogTable = {
-    if (catalog.isTemporaryTable(table)) {
-      throw new AnalysisException(
-        s"${table.toString} is a temporary VIEW, which does not support ALTER ADD COLUMNS.")
-    }
-
-    val catalogTable = catalog.getTableMetadata(table)
-    if (catalogTable.tableType == CatalogTableType.VIEW) {
-      throw new AnalysisException(
-        s"${table.toString} is a VIEW, which does not support ALTER ADD COLUMNS.")
-    }
-
-    if (isDatasourceTable(catalogTable)) {
-      catalogTable.provider.get match {
-        case provider if provider.toLowerCase == "text" =>
-          // TextFileFormat can not support adding column either because text datasource table
-          // is resolved as a single-column table only.
-          throw new AnalysisException(
-            s"""${table.toString} is a text format datasource table,
-               |which does not support ALTER ADD COLUMNS.""".stripMargin)
-        case provider if provider.toLowerCase == "orc"
-          || provider.startsWith("org.apache.spark.sql.hive.orc") =>
-          // TODO Current native orc reader can not handle the difference between
-          // user-specified schema and inferred schema from ORC data file yet.
-          throw new AnalysisException(
-            s"""${table.toString} is an ORC datasource table,
-               |which does not support ALTER ADD COLUMNS.""".stripMargin)
-        case provider
-          if (!DataSource.lookupDataSource(provider).newInstance().isInstanceOf[FileFormat]) =>
-          // For datasource table, we only support HadoopFsRelation
-          throw new AnalysisException(
-            s"""${table.toString} is a datasource table with external provider,
-               |which does not support ALTER ADD COLUMNS.""".stripMargin)
-        case _ =>
-      }
-    }
-
-    catalogTable
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -37,7 +37,10 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
-import org.apache.spark.sql.execution.datasources.PartitioningUtils
+import org.apache.spark.sql.execution.datasources.{DataSource, FileFormat, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
+import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -187,7 +190,7 @@ case class AlterTableAddColumnsCommand(
     columns: Seq[StructField]) extends RunnableCommand {
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val catalogTable = DDLUtils.verifyAlterTableAddColumn(catalog, table)
+    val catalogTable = verifyAlterTableAddColumn(catalog, table)
 
     // If an exception is thrown here we can just assume the table is uncached;
     // this can happen with Hive tables when the underlying catalog is in-memory.
@@ -210,6 +213,41 @@ case class AlterTableAddColumnsCommand(
 
     Seq.empty[Row]
   }
+
+  /**
+   * ALTER TABLE ADD COLUMNS command does not support temporary view/table,
+   * view, or datasource table with text, orc formats or external provider.
+   */
+  private def verifyAlterTableAddColumn(
+    catalog: SessionCatalog,
+    table: TableIdentifier): CatalogTable = {
+    val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)
+
+    if (catalogTable.tableType == CatalogTableType.VIEW) {
+      throw new AnalysisException(
+        s"${table.toString} is a VIEW, which does not support ALTER ADD COLUMNS.")
+    }
+
+    if (DDLUtils.isDatasourceTable(catalogTable)) {
+      DataSource.lookupDataSource(catalogTable.provider.get).newInstance() match {
+        // For datasource table, this command can only support the following File format.
+        // TextFileFormat only default to one column "value"
+        // OrcFileFormat can not handle difference between user-specified schema and
+        // inferred schema yet. TODO, once this issue is resolved , we can add Orc back.
+        // Hive type is already considered as hive serde table, so the logic will not
+        // come in here.
+        case _: JsonFileFormat =>
+        case _: CSVFileFormat =>
+        case _: ParquetFileFormat =>
+        case s =>
+          throw new AnalysisException(
+            s"""${table.toString} is a datasource table with type $s,
+               |which does not support ALTER ADD COLUMNS.""".stripMargin)
+      }
+    }
+
+    catalogTable
+  }
 }
 
 

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2001,17 +2001,17 @@ class HiveDDLSuite
     }
   }
 
-  test("alter datasource table add columns - orc format not supported") {
-    Seq("orc", "ORC", "org.apache.spark.sql.hive.orc",
-      "org.apache.spark.sql.hive.orc.DefaultSource").foreach { source =>
-        withTable("alter_add_ds_text") {
-          sql(s"CREATE TABLE alter_add_ds_text (c1 int) USING $source")
-          val e = intercept[AnalysisException] {
-            sql("ALTER TABLE alter_add_ds_text ADD COLUMNS (c2 int)")
-          }.getMessage
-          assert(e.contains("does not support ALTER ADD COLUMNS"))
-        }
+  Seq("orc", "ORC", "org.apache.spark.sql.hive.orc",
+    "org.apache.spark.sql.hive.orc.DefaultSource").foreach { source =>
+    test(s"alter datasource table add columns - $source format not supported") {
+      withTable("alter_add_ds_text") {
+        sql(s"CREATE TABLE alter_add_ds_text (c1 int) USING $source")
+        val e = intercept[AnalysisException] {
+          sql("ALTER TABLE alter_add_ds_text ADD COLUMNS (c2 int)")
+        }.getMessage
+        assert(e.contains("does not support ALTER ADD COLUMNS"))
       }
+    }
   }
 
   test("alter table add columns -- not support temp view") {
@@ -2020,7 +2020,7 @@ class HiveDDLSuite
       val e = intercept[AnalysisException] {
         sql("alter table tmp_v add columns (c3 int)")
       }
-      assert(e.message.contains("is a temporary VIEW, which does not support ALTER ADD COLUMNS"))
+      assert(e.message.contains("is a VIEW, which does not support ALTER ADD COLUMNS"))
     }
   }