Fix tests.

apache · viirya · May 10, 2018 · May 10, 2018 · May 10, 2018 · May 11, 2018
commit 015e2ad739e5ad7fe6d1d1ef3c919661d8ac3d29
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2767,7 +2767,12 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def count(): Long = withAction("count", groupBy().count().queryExecution) { plan =>
-    plan.executeCollect().head.getLong(0)
+    val collected = plan.executeCollect()
+    if (collected.isEmpty) {
+      0
+    } else {
+      collected.head.getLong(0)
+    }
   }
 
   /**

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
@@ -39,7 +39,9 @@ class ConfigBehaviorSuite extends QueryTest with SharedSQLContext {
     def computeChiSquareTest(): Double = {
       val n = 10000
       // Trigger a sort
-      val data = spark.range(0, n, 1, 1).sort('id.desc)
+      // Range has range partitioning in its output now. To have a range shuffle, we
+      // need to run a repartition first.
+      val data = spark.range(0, n, 1, 1).repartition(10).sort('id.desc)
         .selectExpr("SPARK_PARTITION_ID() pid", "id").as[(Int, Long)].collect()
 
       // Compute histogram for the number of records per partition post sort

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -55,7 +55,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
     val plan = df.queryExecution.executedPlan
     assert(plan.find(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
+        p.asInstanceOf[WholeStageCodegenExec].child.collect {
+          case h: HashAggregateExec => h
+        }.nonEmpty).isDefined)
     assert(df.collect() === Array(Row(0, 1), Row(1, 1), Row(2, 1)))
   }
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -34,14 +34,13 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext {
 
   test("debugCodegen") {
     val res = codegenString(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
-    assert(res.contains("Subtree 1 / 2"))
-    assert(res.contains("Subtree 2 / 2"))
+    assert(res.contains("Subtree 1 / 1"))
     assert(res.contains("Object[]"))
   }
 
   test("debugCodegenStringSeq") {
     val res = codegenStringSeq(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
-    assert(res.length == 2)
+    assert(res.length == 1)
     assert(res.forall{ case (subtree, code) =>
       subtree.contains("Range") && code.contains("Object[]")})
   }