Fixes test failures

apache · liancheng · May 24, 2016 · May 24, 2016 · May 25, 2016 · May 25, 2016
commit fa7b5b6f676553757da96ccd40944ec349a489af
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -212,24 +212,37 @@ class SessionCatalog(
    * If no such database is specified, create it in the current database.
    */
   def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
-    val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableDefinition.identifier.table)
+    val tableId = tableDefinition.identifier
+    val db = formatDatabaseName(tableId.database.getOrElse(getCurrentDatabase))
+    val table = formatTableName(tableId.table)
     val newTableDefinition = tableDefinition.copy(identifier = TableIdentifier(table, Some(db)))
     requireDbExists(db)
 
-    if (newTableDefinition.tableType == CatalogTableType.EXTERNAL) {
+    if (
+      // If this is an external data source table
+      tableDefinition.properties.contains("spark.sql.sources.provider") &&
+      newTableDefinition.tableType == CatalogTableType.EXTERNAL
+    ) {
       // !! HACK ALERT !!
       //
-      // See https://issues.apache.org/jira/browse/SPARK-15269 for more details about why we have to
-      // set `locationUri` and then remove the directory after creating the external table:
-      val tablePath = defaultTablePath(newTableDefinition.identifier)
+      // Due to a restriction of Hive metastore, here we have to set `locationUri` to a temporary
+      // directory that doesn't exist yet but can definitely be successfully created, and then
+      // delete it right after creating the external data source table. This location will be
+      // persisted to Hive metastore as standard Hive table location URI, but Spark SQL doesn't
+      // really use it. Also, since we only do this workaround for external tables, deleting the
+      // directory after the fact doesn't do any harm.
+      //
+      // Please refer to https://issues.apache.org/jira/browse/SPARK-15269 for more details.
+
+      val tempPath = new Path(defaultTablePath(tableId), "-__PLACEHOLDER__").toString
+
       try {
         externalCatalog.createTable(
           db,
-          newTableDefinition.withNewStorage(locationUri = Some(tablePath)),
+          newTableDefinition.withNewStorage(locationUri = Some(tempPath)),
           ignoreIfExists)
       } finally {
-        val path = new Path(tablePath)
+        val path = new Path(tempPath)
         FileSystem.get(path.toUri, hadoopConf).delete(path, true)
       }
     } else {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -556,9 +556,9 @@ class SparkSession private(
   }
 
 
-  /* ------------------------ *
-   |  Catalog-related methods |
-   * ----------------- ------ */
+  /* ------------------------- *
+   |  Catalog-related methods  |
+   * ------------------------- */
 
   /**
    * Interface through which the user may create, drop, alter or query underlying

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -400,7 +400,10 @@ object CreateDataSourceTableUtils extends Logging {
         schema = relation.schema.map { f =>
           CatalogColumn(f.name, f.dataType.catalogString)
         },
-        properties = tableProperties.toMap,
+        // Removes the provider property since we are gonna saving this table as a Hive compatible
+        // one, and other places use this property to check whether a table is a data source table
+        // (e.g. `DDLUtils.isDatasourceTable`).
+        properties = (tableProperties - "spark.sql.sources.provider").toMap,
         viewText = None)
     }
 

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -377,7 +377,7 @@ private[hive] class HiveClientImpl(
             // allows directory paths as location URIs while Spark SQL data source tables also
             // allows file paths. So the standard Hive `dataLocation` is meaningless for Spark SQL
             // data source tables.
-            DDLUtils.isDatasourceTable(properties)
+            DDLUtils.isDatasourceTable(properties) && h.getTableType == HiveTableType.EXTERNAL_TABLE
           },
           inputFormat = Option(h.getInputFormatClass).map(_.getName),
           outputFormat = Option(h.getOutputFormatClass).map(_.getName),