diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 7384b24c50b16..91e6385dec81b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -182,9 +182,28 @@ class SQLContext(@transient val sparkContext: SparkContext) conf.dialect } - sparkContext.getConf.getAll.foreach { - case (key, value) if key.startsWith("spark.sql") => setConf(key, value) - case _ => + { + // We extract spark sql settings from SparkContext's conf and put them to + // Spark SQL's conf. + // First, we populate the SQLConf (conf). So, we can make sure that other values using + // those settings in their construction can get the correct settings. + // For example, metadataHive in HiveContext may need both spark.sql.hive.metastore.version + // and spark.sql.hive.metastore.jars to get correctly constructed. + val properties = new Properties + sparkContext.getConf.getAll.foreach { + case (key, value) if key.startsWith("spark.sql") => properties.setProperty(key, value) + case _ => + } + // We directly put those settings to conf to avoid of calling setConf, which may have + // side-effects. For example, in HiveContext, setConf may cause executionHive and metadataHive + // get constructed. If we call setConf directly, the constructed metadataHive may have + // wrong settings, or the construction may fail. + conf.setConf(properties) + // After we have populated SQLConf, we call setConf to populate other confs in the subclass + // (e.g. hiveconf in HiveContext). + properties.foreach { + case (key, value) => setConf(key, value) + } } @transient diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 7eb4842726665..deceb67d2b966 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.hive.client -import org.apache.spark.{Logging, SparkFunSuite} +import org.apache.spark.sql.hive.HiveContext +import org.apache.spark.{Logging, SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.sql.catalyst.util.quietly import org.apache.spark.util.Utils @@ -37,6 +38,48 @@ class VersionsSuite extends SparkFunSuite with Logging { "hive.metastore.warehouse.dir" -> warehousePath.toString) } + test("SPARK-8020: successfully create a HiveContext with metastore settings in Spark conf.") { + val sparkConf = + new SparkConf() { + // We are not really clone it. We need to keep the custom getAll. + override def clone: SparkConf = this + + override def getAll: Array[(String, String)] = { + val allSettings = super.getAll + val metastoreVersion = get("spark.sql.hive.metastore.version") + val metastoreJars = get("spark.sql.hive.metastore.jars") + + val others = allSettings.filterNot { case (key, _) => + key == "spark.sql.hive.metastore.version" || key == "spark.sql.hive.metastore.jars" + } + + // Put metastore.version to the first one. It is needed to trigger the exception + // caused by SPARK-8020. Other problems triggered by SPARK-8020 + // (e.g. using Hive 0.13.1's metastore client to connect to the a 0.12 metastore) + // are not easy to test. + Array( + ("spark.sql.hive.metastore.version" -> metastoreVersion), + ("spark.sql.hive.metastore.jars" -> metastoreJars)) ++ others + } + } + sparkConf + .set("spark.sql.hive.metastore.version", "12") + .set("spark.sql.hive.metastore.jars", "maven") + + val hiveContext = new HiveContext( + new SparkContext( + "local[2]", + "TestSQLContextInVersionsSuite", + sparkConf)) { + + protected override def configure(): Map[String, String] = buildConf + + } + + // Make sure all metastore related lazy vals got created. + hiveContext.tables() + } + test("success sanity check") { val badClient = IsolatedClientLoader.forVersion("13", buildConf()).client val db = new HiveDatabase("default", "")