-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18817] [SPARKR] [SQL] Set default warehouse dir to tempdir #16290
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ac05089
2583410
1d0d1d2
014d7e1
6eec97d
f7b4772
047054e
b14c302
7a98b91
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
To do this we introduce a new SQL config that is set to tempdir from SparkR.
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2165,6 +2165,14 @@ test_that("SQL error message is returned from JVM", { | |
| expect_equal(grepl("blah", retError), TRUE) | ||
| }) | ||
|
|
||
| test_that("Default warehouse dir should be set to tempdir", { | ||
| # nothing should be written outside tempdir() without explicit user permission | ||
| inital_working_directory_files <- list.files() | ||
|
||
| result <- sql("CREATE TABLE warehouse") | ||
| expect_equal(inital_working_directory_files, list.files()) | ||
| result <- sql("DELETE TABLE warehouse") | ||
| }) | ||
|
|
||
| irisDF <- suppressWarnings(createDataFrame(iris)) | ||
|
|
||
| test_that("Method as.data.frame as a synonym for collect()", { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -819,7 +819,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { | |
|
|
||
| def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH) | ||
|
|
||
| def warehousePath: String = new Path(getConf(StaticSQLConf.WAREHOUSE_PATH)).toString | ||
| def warehousePath: String = new Path(getConf(StaticSQLConf.DEFAULT_WAREHOUSE_PATH)).toString | ||
|
|
||
| def ignoreCorruptFiles: Boolean = getConf(IGNORE_CORRUPT_FILES) | ||
|
|
||
|
|
@@ -964,11 +964,17 @@ object StaticSQLConf { | |
| } | ||
| } | ||
|
|
||
| val WAREHOUSE_PATH = buildConf("spark.sql.warehouse.dir") | ||
| .doc("The default location for managed databases and tables.") | ||
| val DEFAULT_WAREHOUSE_PATH = buildConf("spark.sql.default.warehouse.dir") | ||
|
||
| .doc("The default location for managed databases and tables. " + | ||
| "Used if spark.sql.warehouse.dir is not set") | ||
| .stringConf | ||
| .createWithDefault(Utils.resolveURI("spark-warehouse").toString) | ||
|
|
||
| val WAREHOUSE_PATH = buildConf("spark.sql.warehouse.dir") | ||
| .doc("The location for managed databases and tables.") | ||
|
||
| .stringConf | ||
| .createOptional | ||
|
|
||
| val CATALOG_IMPLEMENTATION = buildConf("spark.sql.catalogImplementation") | ||
| .internal() | ||
| .stringConf | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,14 +55,19 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { | |
| s"is set. Setting ${WAREHOUSE_PATH.key} to the value of " + | ||
| s"hive.metastore.warehouse.dir ('$hiveWarehouseDir').") | ||
| hiveWarehouseDir | ||
| } else { | ||
| } else if (sparkContext.conf.contains(WAREHOUSE_PATH.key) && | ||
| sparkContext.conf.get(WAREHOUSE_PATH).isDefined) { | ||
|
||
| // If spark.sql.warehouse.dir is set, we will override hive.metastore.warehouse.dir using | ||
| // the value of spark.sql.warehouse.dir. | ||
| // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set, | ||
| // we will set hive.metastore.warehouse.dir to the default value of spark.sql.warehouse.dir. | ||
| val sparkWarehouseDir = sparkContext.conf.get(WAREHOUSE_PATH) | ||
| val sparkWarehouseDir = sparkContext.conf.get(WAREHOUSE_PATH).get | ||
| sparkContext.conf.set("hive.metastore.warehouse.dir", sparkWarehouseDir) | ||
| sparkWarehouseDir | ||
| } else { | ||
| // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set, | ||
| // we will set hive.metastore.warehouse.dir to the value of spark.sql.default.warehouse.dir. | ||
| val sparkDefaultWarehouseDir = sparkContext.conf.get(DEFAULT_WAREHOUSE_PATH) | ||
| sparkContext.conf.set("hive.metastore.warehouse.dir", sparkDefaultWarehouseDir) | ||
| sparkDefaultWarehouseDir | ||
| } | ||
|
|
||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -221,6 +221,19 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { | |
| .sessionState.conf.warehousePath.stripSuffix("/")) | ||
| } | ||
|
|
||
| test("changing default value of warehouse path") { | ||
|
||
| try { | ||
| val newWarehouseDefault = "spark-warehouse2" | ||
| val newWarehouseDefaultPath = new Path(Utils.resolveURI(newWarehouseDefault)).toString | ||
| sparkContext.conf.set("spark.sql.default.warehouse.dir", newWarehouseDefaultPath) | ||
| val spark = new SparkSession(sparkContext) | ||
| assert(newWarehouseDefaultPath.stripSuffix("/") === spark | ||
| .sessionState.conf.warehousePath.stripSuffix("/")) | ||
|
||
| } finally { | ||
| sparkContext.conf.remove("spark.sql.default.warehouse.dir") | ||
| } | ||
| } | ||
|
|
||
| test("MAX_CASES_BRANCHES") { | ||
| withTable("tab1") { | ||
| spark.range(10).write.saveAsTable("tab1") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we could just
sparkConfigMap[["spark.sql.warehouse.default.dir"]] <- tempdir()Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should move this after L383 "overrideEnvs(sparkConfigMap, paramMap)" in case
spark.sql.warehouse.default.diris passed in named param and explicitly set to something other then the tmp dirThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved this below L383 - We still need the
existscheck to make sure we don't overwrite any user provided value ?