Merge branch 'master' of github.com:apache/spark into sql-tests-refactor

Conflicts: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompatibilityTest.scala sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala sql/hive/src/test/scala/org/apache/spark/sql/hive/ParquetHiveCompatibilitySuite.scala
apache · andrewor14 · Aug 4, 2015 · Aug 5, 2015 · Aug 5, 2015 · Aug 5, 2015
commit 24c086d7713dfec665189557204490f89bd05e87
diff --git a/...est/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIteratorSuite.scala b/...est/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIteratorSuite.scala
@@ -21,15 +21,12 @@ import org.apache.spark._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.unsafe.memory.TaskMemoryManager
 
-class TungstenAggregationIteratorSuite extends SparkFunSuite {
+class TungstenAggregationIteratorSuite extends SparkFunSuite with SharedSQLContext {
 
   test("memory acquired on construction") {
-    // set up environment
-    val ctx = TestSQLContext
-
     val taskMemoryManager = new TaskMemoryManager(SparkEnv.get.executorMemoryManager)
     val taskContext = new TaskContextImpl(0, 0, 0, 0, taskMemoryManager, null, Seq.empty)
     TaskContext.setTaskContext(taskContext)

diff --git a/...la/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala b/...la/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala
@@ -28,11 +28,12 @@ import org.apache.hadoop.fs.Path
 import org.apache.parquet.avro.AvroParquetWriter
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.execution.datasources.parquet.test.avro.{Nested, ParquetAvroCompat}
+import org.apache.spark.sql.execution.datasources.parquet.test.avro._
 import org.apache.spark.sql.test.SharedSQLContext
 
 class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with SharedSQLContext {
   import ParquetCompatibilityTest._
+  import testImplicits._
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
@@ -45,6 +46,13 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared
     writer.close()
   }
 
+  private def withWriter[T <: IndexedRecord]
+      (path: String, schema: Schema)
+      (f: AvroParquetWriter[T] => Unit): Unit = {
+    val writer = new AvroParquetWriter[T](new Path(path), schema)
+    try f(writer) finally writer.close()
+  }
+
   test("Read Parquet file generated by parquet-avro") {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
@@ -131,8 +139,6 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared
   }
 
   test("SPARK-9407 Don't push down predicates involving Parquet ENUM columns") {
-    import sqlContext.implicits._
-
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 

diff --git a/...t/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompatibilityTest.scala b/...t/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompatibilityTest.scala
@@ -26,6 +26,7 @@ import org.apache.parquet.hadoop.ParquetFileReader
 import org.apache.parquet.schema.MessageType
 
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.util.Utils
 
 /**
  * Helper class for testing Parquet compatibility.
@@ -49,15 +50,18 @@ private[sql] abstract class ParquetCompatibilityTest extends QueryTest with Parq
   }
 
   override protected def afterAll(): Unit = {
-    Utils.deleteRecursively(parquetStore)
-    super.afterAll()
+    try {
+      Utils.deleteRecursively(parquetStore)
+    } finally {
+      super.afterAll()
+    }
   }
 
-  def readParquetSchema(path: String): MessageType = {
+  protected def readParquetSchema(path: String): MessageType = {
     readParquetSchema(path, { path => !path.getName.startsWith("_") })
   }
 
-  def readParquetSchema(path: String, pathFilter: Path => Boolean): MessageType = {
+  protected def readParquetSchema(path: String, pathFilter: Path => Boolean): MessageType = {
     val fsPath = new Path(path)
     val fs = fsPath.getFileSystem(configuration)
     val parquetFiles = fs.listStatus(fsPath, new PathFilter {

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -26,6 +26,8 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.execution.PhysicalRDD
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -152,7 +154,7 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext
     conn.prepareStatement("insert into test.decimals values (12345.67, 1234)").executeUpdate()
     conn.prepareStatement("insert into test.decimals values (34567.89, 1428)").executeUpdate()
     conn.commit()
-    sql(
+    ctx.sql(
       s"""
          |CREATE TEMPORARY TABLE decimals
          |USING org.apache.spark.sql.jdbc
@@ -461,7 +463,7 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext
 
   test("SPARK-9182: filters are not passed through to jdbc source") {
     def checkPushedFilter(query: String, filterStr: String): Unit = {
-      val rddOpt = sql(query).queryExecution.executedPlan.collectFirst {
+      val rddOpt = ctx.sql(query).queryExecution.executedPlan.collectFirst {
         case PhysicalRDD(_, rdd: JDBCRDD, _) => rdd
       }
       assert(rddOpt.isDefined)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ParquetHiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ParquetHiveCompatibilitySuite.scala
@@ -30,56 +30,64 @@ class ParquetHiveCompatibilitySuite extends ParquetCompatibilityTest with Shared
    * Set the staging directory (and hence path to ignore Parquet files under)
    * to that set by [[HiveConf.ConfVars.STAGINGDIR]].
    */
-  override val stagingDir: Option[String] =
-    Some(new HiveConf().getVar(HiveConf.ConfVars.STAGINGDIR))
-
-  override protected def beforeAll(): Unit = {
-    super.beforeAll()
-
-    withSQLConf(HiveContext.CONVERT_METASTORE_PARQUET.key -> "false") {
-      withTempTable("data") {
-        ctx.sql(
-          s"""CREATE TABLE parquet_compat(
-             |  bool_column BOOLEAN,
-             |  byte_column TINYINT,
-             |  short_column SMALLINT,
-             |  int_column INT,
-             |  long_column BIGINT,
-             |  float_column FLOAT,
-             |  double_column DOUBLE,
-             |
-             |  strings_column ARRAY<STRING>,
-             |  int_to_string_column MAP<INT, STRING>
-             |)
-             |STORED AS PARQUET
-             |LOCATION '${parquetStore.getCanonicalPath}'
-           """.stripMargin)
-
-        val schema = ctx.table("parquet_compat").schema
-        val rowRDD = ctx.sparkContext.parallelize(makeRows).coalesce(1)
-        ctx.createDataFrame(rowRDD, schema).registerTempTable("data")
-        ctx.sql("INSERT INTO TABLE parquet_compat SELECT * FROM data")
-      }
-    }
-  }
+  private val stagingDir = new HiveConf().getVar(HiveConf.ConfVars.STAGINGDIR)
 
   override protected def afterAll(): Unit = {
-    ctx.sql("DROP TABLE parquet_compat")
+    try {
+      ctx.sql("DROP TABLE parquet_compat")
+    } finally {
+      super.afterAll()
+    }
   }
 
   test("Read Parquet file generated by parquet-hive") {
     withTable("parquet_compat") {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
+        withSQLConf(HiveContext.CONVERT_METASTORE_PARQUET.key -> "false") {
+          withTempTable("data") {
+            ctx.sql(
+              s"""CREATE TABLE parquet_compat(
+                       |  bool_column BOOLEAN,
+                       |  byte_column TINYINT,
+                       |  short_column SMALLINT,
+                       |  int_column INT,
+                       |  long_column BIGINT,
+                       |  float_column FLOAT,
+                       |  double_column DOUBLE,
+                       |
+                       |  strings_column ARRAY<STRING>,
+                       |  int_to_string_column MAP<INT, STRING>
+                       |)
+                       |STORED AS PARQUET
+                       |LOCATION '$path'
+                     """.stripMargin)
+
+            val schema = ctx.table("parquet_compat").schema
+            val rowRDD = ctx.sparkContext.parallelize(makeRows).coalesce(1)
+            ctx.createDataFrame(rowRDD, schema).registerTempTable("data")
+            ctx.sql("INSERT INTO TABLE parquet_compat SELECT * FROM data")
+          }
+        }
+        val schema = readParquetSchema(path, { path =>
+          !path.getName.startsWith("_") && !path.getName.startsWith(stagingDir)
+        })
 
-    // Unfortunately parquet-hive doesn't add `UTF8` annotation to BINARY when writing strings.
-    // Have to assume all BINARY values are strings here.
-    withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING.key -> "true") {
-      checkAnswer(ctx.read.parquet(parquetStore.getCanonicalPath), makeRows)
+        logInfo(
+          s"""Schema of the Parquet file written by parquet-hive:
+             |$schema
+           """.stripMargin)
+
+        // Unfortunately parquet-hive doesn't add `UTF8` annotation to BINARY when writing strings.
+        // Have to assume all BINARY values are strings here.
+        withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING.key -> "true") {
+          checkAnswer(ctx.read.parquet(path), makeRows)
+        }
+      }
     }
   }
 
-  def makeRows: Seq[Row] = {
+  private def makeRows: Seq[Row] = {
     (0 until 10).map { i =>
       def nullable[T <: AnyRef]: ( => T) => T = makeNullable[T](i)