apache · windpiger · Feb 13, 2017 · Feb 13, 2017 · Feb 21, 2017 · Feb 21, 2017
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -20,13 +20,14 @@ package org.apache.spark.sql.hive.execution
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition}
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
 
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{EmptyRDD, RDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -139,6 +140,13 @@ case class HiveTableScanExec(
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
+    val locationPath = new Path(relation.catalogTable.location)
+    val fs = locationPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
+
+    // if the table location is not exists, return an empty RDD
+    if (!fs.exists(locationPath)) {
+      return new EmptyRDD[InternalRow](sparkSession.sparkContext)
+    }
     // Using dummyCallSite, as getCallSite can turn out to be expensive with
     // with multiple partitions.
     val rdd = if (!relation.hiveQlTable.isPartitioned) {

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
 
 class HiveDDLSuite
   extends QueryTest with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach {
@@ -1494,4 +1495,149 @@ class HiveDDLSuite
       }
     }
   }
+
+  test("insert data to a hive serde table which has a not existed location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+              |CREATE TABLE t(a string, b int)
+              |USING hive
+              |OPTIONS(path "file:${dir.getCanonicalPath}")
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        val expectedPath = s"file:${dir.getAbsolutePath.stripSuffix("/")}"
+        assert(table.location.stripSuffix("/") == expectedPath)
+
+        val tableLocFile = new File(table.location.stripPrefix("file:"))
+        tableLocFile.delete()
+        assert(!tableLocFile.exists())
+        spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
+        assert(tableLocFile.exists())
+        checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
+
+        Utils.deleteRecursively(dir)
+        assert(!tableLocFile.exists())
+        spark.sql("INSERT OVERWRITE TABLE t SELECT 'c', 1")
+        assert(tableLocFile.exists())
+        checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
+
+        val newDir = dir.getAbsolutePath.stripSuffix("/") + "/x"
+        val newDirFile = new File(newDir)
+        spark.sql(s"ALTER TABLE t SET LOCATION '$newDir'")
+        spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
+
+        val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(table1.location == newDir)
+        assert(!newDirFile.exists())
+
+        spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
+        checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
+        assert(newDirFile.exists())
+      }
+    }
+  }
+
+  test("insert into a hive serde table with no existed partition location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+              |CREATE TABLE t(a int, b int, c int, d int)
+              |USING hive
+              |PARTITIONED BY(a, b)
+              |LOCATION "file:${dir.getCanonicalPath}"
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        val expectedPath = s"file:${dir.getAbsolutePath.stripSuffix("/")}"
+        assert(table.location.stripSuffix("/") == expectedPath)
+
+        spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
+        checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
+
+        val partLoc = new File(s"${dir.getAbsolutePath}/a=1")
+        Utils.deleteRecursively(partLoc)
+        assert(!partLoc.exists())
+        // insert overwrite into a partition which location has been deleted.
+        spark.sql("INSERT OVERWRITE TABLE t PARTITION(a=1, b=2) SELECT 7, 8")
+        assert(partLoc.exists())
+        checkAnswer(spark.table("t"), Row(7, 8, 1, 2) :: Nil)
+
+        val newDir = dir.getAbsolutePath.stripSuffix("/") + "/x"
+        val newDirFile = new File(newDir)
+        spark.sql(s"ALTER TABLE t PARTITION(a=1, b=2) SET LOCATION '$newDir'")
+        assert(!newDirFile.exists())
+
+        // insert into a partition which location does not exists.
+        spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 9, 10")
+        assert(newDirFile.exists())
+        checkAnswer(spark.table("t"), Row(9, 10, 1, 2) :: Nil)
+      }
+    }
+  }
+
+  test("read data from a hive serde table which has a not existed location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+              |CREATE TABLE t(a string, b int)
+              |USING hive
+              |OPTIONS(path "file:${dir.getAbsolutePath}")
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        val expectedPath = s"file:${dir.getAbsolutePath.stripSuffix("/")}"
+        assert(table.location.stripSuffix("/") == expectedPath)
+
+        dir.delete()
+        checkAnswer(spark.table("t"), Nil)
+
+        val newDir = dir.getAbsolutePath.stripSuffix("/") + "/x"
+        spark.sql(s"ALTER TABLE t SET LOCATION '$newDir'")
+
+        val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(table1.location == newDir)
+        assert(!new File(newDir).exists())
+        checkAnswer(spark.table("t"), Nil)
+      }
+    }
+  }
+
+  test("read data from a hive serde table with no existed partition location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+              |CREATE TABLE t(a int, b int, c int, d int)
+              |USING hive
+              |PARTITIONED BY(a, b)
+              |LOCATION "file:${dir.getCanonicalPath}"
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+
+        spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
+        checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
+
+        val newDir = dir.getAbsolutePath.stripSuffix("/") + "/x"
+        val newDirFile = new File(newDir)
+        spark.sql(s"ALTER TABLE t PARTITION(a=1, b=2) SET LOCATION '$newDir'")
+        assert(!newDirFile.exists())
+        // select from a partition which location has changed to a not existed location
+        withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") {
+          checkAnswer(spark.sql("select * from t where a=1 and b=2"), Nil)
+        }
+
+        spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 5, 6")
+        checkAnswer(spark.table("t"), Row(5, 6, 1, 2) :: Nil)
+        assert(newDirFile.exists())
+
+        // select from a partition which location has been deleted.
+        Utils.deleteRecursively(newDirFile)
+        assert(!newDirFile.exists())
+        withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") {
+          checkAnswer(spark.sql("select * from t where a=1 and b=2"), Nil)
+        }
+      }
+    }
+  }
 }