Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
aebdfc6
[SPARK-19667][SQL]create table with hiveenabled in default database u…
windpiger Feb 20, 2017
825c0ad
rename a conf name
windpiger Feb 20, 2017
a2c9168
fix test faile
windpiger Feb 21, 2017
bacd528
process default database location when create/get database from metas…
windpiger Feb 22, 2017
3f6e061
remove an redundant line
windpiger Feb 22, 2017
96dcc7d
fix empty string location of database
windpiger Feb 22, 2017
f329387
modify the test case
windpiger Feb 22, 2017
83dba73
Merge branch 'master' into defaultDBPathInHive
windpiger Feb 22, 2017
58a0020
fix test failed
windpiger Feb 22, 2017
1dce2d7
add log to find out why jenkins failed
windpiger Feb 22, 2017
12f81d3
add scalastyle:off for println
windpiger Feb 22, 2017
56e83d5
fix test faile
windpiger Feb 22, 2017
901bb1c
make warehouse path qualified for default database
windpiger Feb 23, 2017
99d9746
remove a string s
windpiger Feb 23, 2017
db555e3
modify a comment
windpiger Feb 23, 2017
d327994
fix test failed
windpiger Feb 23, 2017
73c8802
move to sessioncatalog
windpiger Feb 23, 2017
747b31a
remove import
windpiger Feb 23, 2017
8f8063f
remove an import
windpiger Feb 23, 2017
4dc11c1
modify some codestyle and some comment
windpiger Feb 24, 2017
9c0773b
Merge branch 'defaultDBPathInHive' of github.com:windpiger/spark into…
windpiger Feb 24, 2017
80b8133
mv defaultdb path logic to ExternalCatalog
windpiger Feb 27, 2017
41ea115
modify a comment
windpiger Feb 27, 2017
13245e4
modify a comment
windpiger Feb 27, 2017
096ae63
add final def
windpiger Mar 1, 2017
badd61b
modify some code
windpiger Mar 2, 2017
35d2b59
add lazy flag
windpiger Mar 2, 2017
e3a467e
modify test case
windpiger Mar 3, 2017
ae9938a
modify test case
windpiger Mar 3, 2017
7739ccd
mv getdatabase
windpiger Mar 3, 2017
f93f5d3
merge with master
windpiger Mar 8, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
make warehouse path qualified for default database
  • Loading branch information
windpiger committed Feb 23, 2017
commit 901bb1c37ae510008b51d260750b96b88def93f8
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ import org.apache.spark.sql.catalyst.util.StringUtils

object SessionCatalog {
val DEFAULT_DATABASE = "default"

/**
* This method is used to make the given path qualified before we
* store this path in the underlying external catalog. So, when a path
* does not contain a scheme, this path will not be changed after the default
* FileSystem is changed.
*/
def makeQualifiedPath(path: String, conf: Configuration): Path = {
val hadoopPath = new Path(path)
val fs = hadoopPath.getFileSystem(conf)
fs.makeQualified(hadoopPath)
}
}

/**
Expand Down Expand Up @@ -125,18 +137,6 @@ class SessionCatalog(
CacheBuilder.newBuilder().maximumSize(cacheSize).build[QualifiedTableName, LogicalPlan]()
}

/**
* This method is used to make the given path qualified before we
* store this path in the underlying external catalog. So, when a path
* does not contain a scheme, this path will not be changed after the default
* FileSystem is changed.
*/
private def makeQualifiedPath(path: String): Path = {
val hadoopPath = new Path(path)
val fs = hadoopPath.getFileSystem(hadoopConf)
fs.makeQualified(hadoopPath)
}

private def requireDbExists(db: String): Unit = {
if (!databaseExists(db)) {
throw new NoSuchDatabaseException(db)
Expand Down Expand Up @@ -170,7 +170,7 @@ class SessionCatalog(
"you cannot create a database with this name.")
}
validateName(dbName)
val qualifiedPath = makeQualifiedPath(dbDefinition.locationUri).toString
val qualifiedPath = makeQualifiedPath(dbDefinition.locationUri, hadoopConf).toString
externalCatalog.createDatabase(
dbDefinition.copy(name = dbName, locationUri = qualifiedPath),
ignoreIfExists)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {

def tableDir: File = {
val identifier = spark.sessionState.sqlParser.parseTableIdentifier("bucketed_table")
new File(URI.create(s"file:${spark.sessionState.catalog.defaultTablePath(identifier)
.stripPrefix("file:")}"))
new File(URI.create(s"${spark.sessionState.catalog.defaultTablePath(identifier)}"))
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,10 @@ private[hive] class HiveClientImpl(
override def getDatabase(dbName: String): CatalogDatabase = withHiveState {
Option(client.getDatabase(dbName)).map { d =>
// default database's location always use the warehouse path
// since the location of database stored in metastore is qualified,
// here we also make qualify for warehouse location
val dbLocation = if (dbName == SessionCatalog.DEFAULT_DATABASE) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it more logical to put this logic in SessionCatalog?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, I don't get the point, if this logic is exactly what we expected, we'd better to replace it at the beginning?

sparkConf.get(WAREHOUSE_PATH)
SessionCatalog.makeQualifiedPath(sparkConf.get(WAREHOUSE_PATH), hadoopConf).toString
Copy link
Contributor

@cloud-fan cloud-fan Feb 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't work for InMemoryCatalog, isn't it?

You should either implement this logic in all ExternalCatalogs, or put it in SessionCatalog

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

InmemoryCatalog doesn't share metastore db, should we also do some change for it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I want is consistency. Now we decide to define the location of default database as warehouse path, and we should stick with it. The main goal of this PR is not to fix the bug when sharing metastore db, but to change the definition of default database location.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed! 👍

} else d.getLocationUri

CatalogDatabase(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,6 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
)

table.copy(
storage = table.storage.copy(
locationUri = table.storage.locationUri.map(_.stripPrefix("file:"))),
createTime = 0L,
lastAccessTime = 0L,
properties = table.properties.filterKeys(!nondeterministicProps.contains(_))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ class VersionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton w

val tPath = new Path(spark.sessionState.conf.warehousePath, "t")
Seq("1").toDF("a").write.saveAsTable("t")
val expectedPath = tPath.toUri.getPath.stripSuffix("/")
val expectedPath = s"file:${tPath.toUri.getPath.stripSuffix("/")}"
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))

assert(table.location.stripSuffix("/") == expectedPath)
Expand All @@ -665,7 +665,7 @@ class VersionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton w
val t1Path = new Path(spark.sessionState.conf.warehousePath, "t1")
spark.sql("create table t1 using parquet as select 2 as a")
val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
val expectedPath1 = t1Path.toUri.getPath.stripSuffix("/")
val expectedPath1 = s"file:${t1Path.toUri.getPath.stripSuffix("/")}"

assert(table1.location.stripSuffix("/") == expectedPath1)
assert(t1Path.getFileSystem(spark.sessionState.newHadoopConf()).exists(t1Path))
Expand Down