Remove HiveTest singleton

This allows us to use custom SparkContexts in hive tests.
apache · andrewor14 · Aug 4, 2015 · Aug 5, 2015 · Aug 5, 2015 · Aug 5, 2015
commit d1d1449f3c01c8bb08cc56e9169180313f187bf3
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -333,11 +333,12 @@ object SQL {
 
 object Hive {
 
+  // TODO: check me, will this work?
   lazy val settings = Seq(
     javaOptions += "-XX:MaxPermSize=256m",
     // Specially disable assertions since some Hive tests fail them
     javaOptions in Test := (javaOptions in Test).value.filterNot(_ == "-ea"),
-    // Multiple queries rely on the TestHive singleton. See comments there for more details.
+    // TODO: re-enable this now that we've gotten rid of the TestHive singleton?
     parallelExecution in Test := false,
     // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
     // only for this subproject.
@@ -356,8 +357,6 @@ object Hive {
         |import org.apache.spark.sql.execution
         |import org.apache.spark.sql.functions._
         |import org.apache.spark.sql.hive._
-        |import org.apache.spark.sql.hive.test.TestHive._
-        |import org.apache.spark.sql.hive.test.TestHive.implicits._
         |import org.apache.spark.sql.types._""".stripMargin,
     cleanupCommands in console := "sparkContext.stop()",
     // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce

diff --git a/sql/README.md b/sql/README.md
@@ -60,7 +60,6 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive._
-import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.types._
 Type in expressions to have them evaluated.
 Type :help for more information.

diff --git a/...ility/src/test/scala/org/apache/spark/sql/hive/execution/HashJoinCompatibilitySuite.scala b/...ility/src/test/scala/org/apache/spark/sql/hive/execution/HashJoinCompatibilitySuite.scala
@@ -20,19 +20,18 @@ package org.apache.spark.sql.hive.execution
 import java.io.File
 
 import org.apache.spark.sql.SQLConf
-import org.apache.spark.sql.hive.test.TestHive
 
 /**
  * Runs the test cases that are included in the hive distribution with hash joins.
  */
 class HashJoinCompatibilitySuite extends HiveCompatibilitySuite {
   override def beforeAll() {
     super.beforeAll()
-    TestHive.setConf(SQLConf.SORTMERGE_JOIN, false)
+    ctx.setConf(SQLConf.SORTMERGE_JOIN, false)
   }
 
   override def afterAll() {
-    TestHive.setConf(SQLConf.SORTMERGE_JOIN, true)
+    ctx.setConf(SQLConf.SORTMERGE_JOIN, true)
     super.afterAll()
   }
 

diff --git a/...atibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/...atibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -23,41 +23,41 @@ import java.util.{Locale, TimeZone}
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.SQLConf
-import org.apache.spark.sql.hive.test.TestHive
 
 /**
  * Runs the test cases that are included in the hive distribution.
  */
 class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
+
   // TODO: bundle in jar files... get from classpath
-  private lazy val hiveQueryDir = TestHive.getHiveFile(
+  private lazy val hiveQueryDir = ctx.getHiveFile(
     "ql/src/test/queries/clientpositive".split("/").mkString(File.separator))
 
   private val originalTimeZone = TimeZone.getDefault
   private val originalLocale = Locale.getDefault
-  private val originalColumnBatchSize = TestHive.conf.columnBatchSize
-  private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning
+  private val originalColumnBatchSize = ctx.conf.columnBatchSize
+  private val originalInMemoryPartitionPruning = ctx.conf.inMemoryPartitionPruning
 
   def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
 
   override def beforeAll() {
-    TestHive.cacheTables = true
+  ctx.cacheTables = true
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
     TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
     // Add Locale setting
     Locale.setDefault(Locale.US)
     // Set a relatively small column batch size for testing purposes
-    TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5)
+    ctx.setConf(SQLConf.COLUMN_BATCH_SIZE, 5)
     // Enable in-memory partition pruning for testing purposes
-    TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
+    ctx.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
   }
 
   override def afterAll() {
-    TestHive.cacheTables = false
+    ctx.cacheTables = false
     TimeZone.setDefault(originalTimeZone)
     Locale.setDefault(originalLocale)
-    TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, originalColumnBatchSize)
-    TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
+    ctx.setConf(SQLConf.COLUMN_BATCH_SIZE, originalColumnBatchSize)
+    ctx.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
   }
 
   /** A list of tests deemed out of scope currently and thus completely disregarded. */

diff --git a/...ity/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala b/...ity/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
@@ -22,8 +22,6 @@ import java.util.{Locale, TimeZone}
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.util.Utils
 
 /**
@@ -33,12 +31,14 @@ import org.apache.spark.util.Utils
  * files, every `createQueryTest` calls should explicitly set `reset` to `false`.
  */
 class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfter {
+  import ctx._
+
   private val originalTimeZone = TimeZone.getDefault
   private val originalLocale = Locale.getDefault
   private val testTempDir = Utils.createTempDir()
 
   override def beforeAll() {
-    TestHive.cacheTables = true
+    ctx.cacheTables = true
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
     TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
     // Add Locale setting
@@ -59,7 +59,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
         |  p_retailprice DOUBLE,
         |  p_comment STRING)
       """.stripMargin)
-    val testData1 = TestHive.getHiveFile("data/files/part_tiny.txt").getCanonicalPath
+    val testData1 = ctx.getHiveFile("data/files/part_tiny.txt").getCanonicalPath
     sql(
       s"""
         |LOAD DATA LOCAL INPATH '$testData1' overwrite into table part
@@ -83,7 +83,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
         |row format delimited
         |fields terminated by '|'
       """.stripMargin)
-    val testData2 = TestHive.getHiveFile("data/files/over1k").getCanonicalPath
+    val testData2 = ctx.getHiveFile("data/files/over1k").getCanonicalPath
     sql(
       s"""
         |LOAD DATA LOCAL INPATH '$testData2' overwrite into table over1k
@@ -100,10 +100,10 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
   }
 
   override def afterAll() {
-    TestHive.cacheTables = false
+    ctx.cacheTables = false
     TimeZone.setDefault(originalTimeZone)
     Locale.setDefault(originalLocale)
-    TestHive.reset()
+    ctx.reset()
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -766,7 +766,7 @@ class HiveWindowFunctionQueryFileSuite
   private val testTempDir = Utils.createTempDir()
 
   override def beforeAll() {
-    TestHive.cacheTables = true
+    ctx.cacheTables = true
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
     TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
     // Add Locale setting
@@ -783,10 +783,10 @@ class HiveWindowFunctionQueryFileSuite
   }
 
   override def afterAll() {
-    TestHive.cacheTables = false
+    ctx.cacheTables = false
     TimeZone.setDefault(originalTimeZone)
     Locale.setDefault(originalLocale)
-    TestHive.reset()
+    ctx.reset()
   }
 
   override def blackList: Seq[String] = Seq(

diff --git a/...apache/spark/sql/hive/test/TestHive.scala → ...spark/sql/hive/test/TestHiveContext.scala b/...apache/spark/sql/hive/test/TestHive.scala → ...spark/sql/hive/test/TestHiveContext.scala
@@ -42,30 +42,12 @@ import org.apache.spark.{SparkConf, SparkContext}
 /* Implicit conversions */
 import scala.collection.JavaConversions._
 
-// TODO: remove it
-object TestHive
-  extends TestHiveContext(
-    new SparkContext(
-      System.getProperty("spark.sql.test.master", "local[32]"),
-      "TestSQLContext",
-      new SparkConf()
-        .set("spark.sql.test", "")
-        .set("spark.sql.hive.metastore.barrierPrefixes",
-          "org.apache.spark.sql.hive.execution.PairSerDe")
-        .set("spark.buffer.pageSize", "4m")
-        // SPARK-8910
-        .set("spark.ui.enabled", "false")))
-
 /**
  * A locally running test instance of Spark's Hive execution engine.
  *
  * Data from [[testTables]] will be automatically loaded whenever a query is run over those tables.
  * Calling [[reset]] will delete all tables and other state in the database, leaving the database
  * in a "clean" state.
- *
- * TestHive is singleton object version of this class because instantiating multiple copies of the
- * hive metastore seems to lead to weird non-deterministic failures.  Therefore, the execution of
- * test cases that rely on TestHive must be serialized.
  */
 class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   self =>

diff --git a/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaDataFrameSuite.java
@@ -30,7 +30,7 @@
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.expressions.Window;
 import org.apache.spark.sql.hive.HiveContext;
-import org.apache.spark.sql.hive.test.TestHive$;
+import org.apache.spark.sql.hive.test.TestHiveContext;
 
 public class JavaDataFrameSuite {
   private transient JavaSparkContext sc;
@@ -47,7 +47,7 @@ private void checkAnswer(DataFrame actual, List<Row> expected) {
 
   @Before
   public void setUp() throws IOException {
-    hc = TestHive$.MODULE$;
+    hc = new TestHiveContext();
     sc = new JavaSparkContext(hc.sparkContext());
 
     List<String> jsonObjects = new ArrayList<String>(10);

diff --git a/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
@@ -38,7 +38,7 @@
 import org.apache.spark.sql.QueryTest$;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.hive.HiveContext;
-import org.apache.spark.sql.hive.test.TestHive$;
+import org.apache.spark.sql.hive.test.TestHiveContext;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
@@ -63,7 +63,7 @@ private void checkAnswer(DataFrame actual, List<Row> expected) {
 
   @Before
   public void setUp() throws IOException {
-    sqlContext = TestHive$.MODULE$;
+    sqlContext = new TestHiveContext();
     sc = new JavaSparkContext(sqlContext.sparkContext());
 
     originalDefaultSource = sqlContext.conf().defaultDataSourceName();

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -19,14 +19,15 @@ package org.apache.spark.sql.hive
 
 import java.io.File
 
-import org.apache.spark.sql.columnar.{InMemoryColumnarTableScan, InMemoryRelation}
-import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.{SaveMode, AnalysisException, DataFrame, QueryTest}
+import org.apache.spark.sql.{SaveMode, AnalysisException, QueryTest}
+import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
+import org.apache.spark.sql.hive.test.MyTestHiveContext
 import org.apache.spark.storage.RDDBlockId
 import org.apache.spark.util.Utils
 
-class CachedTableSuite extends QueryTest {
+class CachedTableSuite extends QueryTest with MyTestHiveContext {
+  private val ctx = hiveContext
+  import ctx._
 
   def rddIdOf(tableName: String): Int = {
     val executedPlan = table(tableName).queryExecution.executedPlan
@@ -95,18 +96,18 @@ class CachedTableSuite extends QueryTest {
 
   test("correct error on uncache of non-cached table") {
     intercept[IllegalArgumentException] {
-      TestHive.uncacheTable("src")
+      uncacheTable("src")
     }
   }
 
   test("'CACHE TABLE' and 'UNCACHE TABLE' HiveQL statement") {
-    TestHive.sql("CACHE TABLE src")
+    sql("CACHE TABLE src")
     assertCached(table("src"))
-    assert(TestHive.isCached("src"), "Table 'src' should be cached")
+    assert(isCached("src"), "Table 'src' should be cached")
 
-    TestHive.sql("UNCACHE TABLE src")
+    sql("UNCACHE TABLE src")
     assertCached(table("src"), 0)
-    assert(!TestHive.isCached("src"), "Table 'src' should not be cached")
+    assert(!isCached("src"), "Table 'src' should not be cached")
   }
 
   test("CACHE TABLE tableName AS SELECT * FROM anotherTable") {

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
@@ -22,12 +22,14 @@ import scala.util.Try
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.hive.test.MyTestHiveContext
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 
 
-class ErrorPositionSuite extends QueryTest with BeforeAndAfter {
+class ErrorPositionSuite extends QueryTest with BeforeAndAfter with MyTestHiveContext {
+  private val ctx = hiveContext
+  import ctx.implicits._
+  import ctx._
 
   before {
     Seq((1, 1, 1)).toDF("a", "a", "b").registerTempTable("dupAttributes")

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
@@ -19,24 +19,25 @@ package org.apache.spark.sql.hive
 
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.scalatest.BeforeAndAfterAll
+import org.apache.spark.sql.hive.test.MyTestHiveContext
 
 // TODO ideally we should put the test suite into the package `sql`, as
 // `hive` package is optional in compiling, however, `SQLContext.sql` doesn't
 // support the `cube` or `rollup` yet.
-class HiveDataFrameAnalyticsSuite extends QueryTest with BeforeAndAfterAll {
+class HiveDataFrameAnalyticsSuite extends QueryTest with MyTestHiveContext {
+  private val ctx = hiveContext
+  import ctx.implicits._
+  import ctx._
+
   private var testData: DataFrame = _
 
   override def beforeAll() {
     testData = Seq((1, 2), (2, 4)).toDF("a", "b")
-    TestHive.registerDataFrameAsTable(testData, "mytable")
+    registerDataFrameAsTable(testData, "mytable")
   }
 
   override def afterAll(): Unit = {
-    TestHive.dropTempTable("mytable")
+    dropTempTable("mytable")
   }
 
   test("rollup") {

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameJoinSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameJoinSuite.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.sql.hive
 
 import org.apache.spark.sql.{Row, QueryTest}
-import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.hive.test.MyTestHiveContext
 
 
-class HiveDataFrameJoinSuite extends QueryTest {
+class HiveDataFrameJoinSuite extends QueryTest with MyTestHiveContext {
+  private val ctx = hiveContext
+  import ctx.implicits._
 
   // We should move this into SQL package if we make case sensitivity configurable in SQL.
   test("join - self join auto resolve ambiguity with case insensitivity") {

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala
@@ -20,10 +20,12 @@ package org.apache.spark.sql.hive
 import org.apache.spark.sql.{Row, QueryTest}
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.hive.test.MyTestHiveContext
 
-class HiveDataFrameWindowSuite extends QueryTest {
+class HiveDataFrameWindowSuite extends QueryTest with MyTestHiveContext {
+  private val ctx = hiveContext
+  import ctx.implicits._
+  import ctx._
 
   test("reuse window partitionBy") {
     val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value")