make warehouse path qualified for default database

apache · windpiger · Feb 20, 2017 · Feb 20, 2017 · Feb 21, 2017 · Feb 22, 2017
commit 901bb1c37ae510008b51d260750b96b88def93f8
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -37,6 +37,18 @@ import org.apache.spark.sql.catalyst.util.StringUtils
 
 object SessionCatalog {
   val DEFAULT_DATABASE = "default"
+
+  /**
+   * This method is used to make the given path qualified before we
+   * store this path in the underlying external catalog. So, when a path
+   * does not contain a scheme, this path will not be changed after the default
+   * FileSystem is changed.
+   */
+  def makeQualifiedPath(path: String, conf: Configuration): Path = {
+    val hadoopPath = new Path(path)
+    val fs = hadoopPath.getFileSystem(conf)
+    fs.makeQualified(hadoopPath)
+  }
 }
 
 /**
@@ -125,18 +137,6 @@ class SessionCatalog(
     CacheBuilder.newBuilder().maximumSize(cacheSize).build[QualifiedTableName, LogicalPlan]()
   }
 
-  /**
-   * This method is used to make the given path qualified before we
-   * store this path in the underlying external catalog. So, when a path
-   * does not contain a scheme, this path will not be changed after the default
-   * FileSystem is changed.
-   */
-  private def makeQualifiedPath(path: String): Path = {
-    val hadoopPath = new Path(path)
-    val fs = hadoopPath.getFileSystem(hadoopConf)
-    fs.makeQualified(hadoopPath)
-  }
-
   private def requireDbExists(db: String): Unit = {
     if (!databaseExists(db)) {
       throw new NoSuchDatabaseException(db)
@@ -170,7 +170,7 @@ class SessionCatalog(
           "you cannot create a database with this name.")
     }
     validateName(dbName)
-    val qualifiedPath = makeQualifiedPath(dbDefinition.locationUri).toString
+    val qualifiedPath = makeQualifiedPath(dbDefinition.locationUri, hadoopConf).toString
     externalCatalog.createDatabase(
       dbDefinition.copy(name = dbName, locationUri = qualifiedPath),
       ignoreIfExists)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
@@ -92,8 +92,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
 
   def tableDir: File = {
     val identifier = spark.sessionState.sqlParser.parseTableIdentifier("bucketed_table")
-    new File(URI.create(s"file:${spark.sessionState.catalog.defaultTablePath(identifier)
-      .stripPrefix("file:")}"))
+    new File(URI.create(s"${spark.sessionState.catalog.defaultTablePath(identifier)}"))
   }
 
   /**

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -341,8 +341,10 @@ private[hive] class HiveClientImpl(
   override def getDatabase(dbName: String): CatalogDatabase = withHiveState {
     Option(client.getDatabase(dbName)).map { d =>
       // default database's location always use the warehouse path
+      // since the location of database stored in metastore is qualified,
+      // here we also make qualify for warehouse location
       val dbLocation = if (dbName == SessionCatalog.DEFAULT_DATABASE) {
-        sparkConf.get(WAREHOUSE_PATH)
+        SessionCatalog.makeQualifiedPath(sparkConf.get(WAREHOUSE_PATH), hadoopConf).toString
       } else d.getLocationUri
 
       CatalogDatabase(

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
@@ -344,8 +344,6 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
       )
 
       table.copy(
-        storage = table.storage.copy(
-          locationUri = table.storage.locationUri.map(_.stripPrefix("file:"))),
         createTime = 0L,
         lastAccessTime = 0L,
         properties = table.properties.filterKeys(!nondeterministicProps.contains(_))

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -655,7 +655,7 @@ class VersionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton w
 
         val tPath = new Path(spark.sessionState.conf.warehousePath, "t")
         Seq("1").toDF("a").write.saveAsTable("t")
-        val expectedPath = tPath.toUri.getPath.stripSuffix("/")
+        val expectedPath = s"file:${tPath.toUri.getPath.stripSuffix("/")}"
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
 
         assert(table.location.stripSuffix("/") == expectedPath)
@@ -665,7 +665,7 @@ class VersionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton w
         val t1Path = new Path(spark.sessionState.conf.warehousePath, "t1")
         spark.sql("create table t1 using parquet as select 2 as a")
         val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
-        val expectedPath1 = t1Path.toUri.getPath.stripSuffix("/")
+        val expectedPath1 = s"file:${t1Path.toUri.getPath.stripSuffix("/")}"
 
         assert(table1.location.stripSuffix("/") == expectedPath1)
         assert(t1Path.getFileSystem(spark.sessionState.newHadoopConf()).exists(t1Path))