-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-26794][SQL]SparkSession enableHiveSupport does not point to hive but in-memory while the SparkContext exists #23709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
275e998
e1eafe7
4a1a18b
62d2aa3
8004c19
10ebff7
f5c15e0
d57226a
0c07ff1
35b3f17
0cac5e6
92fec4d
391e14c
eff569b
748821c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,28 +17,54 @@ | |
|
|
||
| package org.apache.spark.sql.hive | ||
|
|
||
| import org.apache.hadoop.hive.conf.HiveConf.ConfVars | ||
|
|
||
| import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} | ||
| import org.apache.spark.sql.SparkSession | ||
| import org.apache.spark.sql.internal.SharedState | ||
| import org.apache.spark.sql.internal.StaticSQLConf._ | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| class HiveSharedStateSuite extends SparkFunSuite { | ||
|
|
||
| test("the catalog should be determined at the very first") { | ||
| SparkContext.getActive.foreach(_.stop()) | ||
| var sc: SparkContext = null | ||
| try { | ||
| val conf = new SparkConf().setMaster("local").setAppName("SharedState Test") | ||
| sc = new SparkContext(conf) | ||
| val ss = SparkSession.builder().enableHiveSupport().getOrCreate() | ||
| assert(ss.sharedState.externalCatalog.unwrapped.getClass.getName | ||
| .contains("HiveExternalCatalog"), "The catalog should be hive ") | ||
|
|
||
| val ss2 = SparkSession.builder().getOrCreate() | ||
| assert(ss2.sharedState.externalCatalog.unwrapped.getClass.getName | ||
| .contains("HiveExternalCatalog"), "The catalog should be shared across sessions") | ||
| } finally { | ||
| if (sc != null && !sc.isStopped) { | ||
| sc.stop() | ||
| } | ||
| } | ||
| val conf = new SparkConf().setMaster("local").setAppName("SharedState Test") | ||
| val sc = SparkContext.getOrCreate(conf) | ||
| val ss = SparkSession.builder().enableHiveSupport().getOrCreate() | ||
| assert(ss.sharedState.externalCatalog.unwrapped.getClass.getName | ||
| .contains("HiveExternalCatalog"), "The catalog should be hive ") | ||
|
|
||
| val ss2 = SparkSession.builder().getOrCreate() | ||
| assert(ss2.sharedState.externalCatalog.unwrapped.getClass.getName | ||
| .contains("HiveExternalCatalog"), "The catalog should be shared across sessions") | ||
|
|
||
| } | ||
|
|
||
| test("using initial configs to generate SharedState") { | ||
|
||
| val conf = new SparkConf().setMaster("local").setAppName("SharedState Test") | ||
| val sc = SparkContext.getOrCreate(conf) | ||
| val invalidPath = "invalid/path" | ||
| val metastorePath = Utils.createTempDir() | ||
| val tmpDb = "tmp_db" | ||
| val initialConfigs = Map("spark.foo" -> "bar", | ||
| WAREHOUSE_PATH.key -> invalidPath, | ||
| ConfVars.METASTOREWAREHOUSE.varname -> invalidPath, | ||
| CATALOG_IMPLEMENTATION.key -> "hive", | ||
| ConfVars.METASTORECONNECTURLKEY.varname -> | ||
| s"jdbc:derby:;databaseName=$metastorePath/metastore_db;create=true", | ||
| GLOBAL_TEMP_DATABASE.key -> tmpDb) | ||
| val state = new SharedState(sc, initialConfigs) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we be more real-world and create SparkSession here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In hive module test, there is an long cached TestHiveSession, so I use this SharedState explicitly
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why can we build SparkSession in the above test case?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Individually the above test runs correctly, but will use a existing SparkSession with hive support while runs under the whole hive module. so I add the second test to insure all configurations passed correctly.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we add
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. without this fix ,test 1 fails individually but pass fully
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to move test 1 to sql core module, we can catch the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IllegalArgumentException does not mean that the catalog is hive or not. It will also pass anyway with or without this fix,and the fact is that it doesn't have the chance to get to where the catalog is initialized.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then let's just remove test 1, but post the code in PR description to let people know how to reproduce the bug.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. both done. |
||
| assert(state.warehousePath !== invalidPath, "warehouse path can't determine by session options") | ||
| assert(sc.conf.get(WAREHOUSE_PATH.key) !== invalidPath, | ||
| "warehouse conf in session options can't affect application wide spark conf") | ||
| assert(sc.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) !== invalidPath, | ||
| "warehouse conf in session options can't affect application wide hadoop conf") | ||
|
|
||
| assert(!state.sparkContext.conf.contains("spark.foo"), | ||
| "static spark conf should not be affected by session") | ||
| assert(state.sparkContext.conf.get(CATALOG_IMPLEMENTATION) === "in-memory") | ||
| assert(state.globalTempViewManager.database === tmpDb) | ||
| assert(state.externalCatalog.unwrapped.isInstanceOf[HiveExternalCatalog], | ||
| "Initial SparkSession options can determine the catalog") | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: use
isInstanceOf[HiveExternalCatalog]