Skip to content

Commit 425c7c2

Browse files
petermaxleerxin
authored andcommitted
[SPARK-17008][SPARK-17009][SQL] Normalization and isolation in SQLQueryTestSuite.
## What changes were proposed in this pull request? This patch enhances SQLQueryTestSuite in two ways: 1. SPARK-17009: Use a new SparkSession for each test case to provide stronger isolation (e.g. config changes in one test case does not impact another). That said, we do not currently isolate catalog changes. 2. SPARK-17008: Normalize query output using sorting, inspired by HiveComparisonTest. I also ported a few new test cases over from SQLQuerySuite. ## How was this patch tested? This is a test harness update. Author: petermaxlee <petermaxlee@gmail.com> Closes #14590 from petermaxlee/SPARK-17008.
1 parent ab648c0 commit 425c7c2

File tree

8 files changed

+180
-65
lines changed

8 files changed

+180
-65
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
-- date time functions
2+
3+
-- [SPARK-16836] current_date and current_timestamp literals
4+
select current_date = current_date(), current_timestamp = current_timestamp();
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
create temporary view hav as select * from values
2+
("one", 1),
3+
("two", 2),
4+
("three", 3),
5+
("one", 5)
6+
as hav(k, v);
7+
8+
-- having clause
9+
SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2;
10+
11+
-- having condition contains grouping column
12+
SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2;
13+
14+
-- SPARK-11032: resolve having correctly
15+
SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0);
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
create temporary view nt1 as select * from values
2+
("one", 1),
3+
("two", 2),
4+
("three", 3)
5+
as nt1(k, v1);
6+
7+
create temporary view nt2 as select * from values
8+
("one", 1),
9+
("two", 22),
10+
("one", 5)
11+
as nt2(k, v2);
12+
13+
14+
SELECT * FROM nt1 natural join nt2 where k = "one";
15+
16+
SELECT * FROM nt1 natural left join nt2 order by v1, v2;
17+
18+
SELECT * FROM nt1 natural right join nt2 order by v1, v2;
19+
20+
SELECT count(*) FROM nt1 natural full outer join nt2;
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
2+
-- Number of queries: 1
3+
4+
5+
-- !query 0
6+
select current_date = current_date(), current_timestamp = current_timestamp()
7+
-- !query 0 schema
8+
struct<(current_date() = current_date()):boolean,(current_timestamp() = current_timestamp()):boolean>
9+
-- !query 0 output
10+
true true
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
2+
-- Number of queries: 4
3+
4+
5+
-- !query 0
6+
create temporary view hav as select * from values
7+
("one", 1),
8+
("two", 2),
9+
("three", 3),
10+
("one", 5)
11+
as hav(k, v)
12+
-- !query 0 schema
13+
struct<>
14+
-- !query 0 output
15+
16+
17+
18+
-- !query 1
19+
SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2
20+
-- !query 1 schema
21+
struct<k:string,sum(v):bigint>
22+
-- !query 1 output
23+
one 6
24+
three 3
25+
26+
27+
-- !query 2
28+
SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2
29+
-- !query 2 schema
30+
struct<count(k):bigint>
31+
-- !query 2 output
32+
1
33+
34+
35+
-- !query 3
36+
SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0)
37+
-- !query 3 schema
38+
struct<min(v):int>
39+
-- !query 3 output
40+
1
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
2+
-- Number of queries: 6
3+
4+
5+
-- !query 0
6+
create temporary view nt1 as select * from values
7+
("one", 1),
8+
("two", 2),
9+
("three", 3)
10+
as nt1(k, v1)
11+
-- !query 0 schema
12+
struct<>
13+
-- !query 0 output
14+
15+
16+
17+
-- !query 1
18+
create temporary view nt2 as select * from values
19+
("one", 1),
20+
("two", 22),
21+
("one", 5)
22+
as nt2(k, v2)
23+
-- !query 1 schema
24+
struct<>
25+
-- !query 1 output
26+
27+
28+
29+
-- !query 2
30+
SELECT * FROM nt1 natural join nt2 where k = "one"
31+
-- !query 2 schema
32+
struct<k:string,v1:int,v2:int>
33+
-- !query 2 output
34+
one 1 1
35+
one 1 5
36+
37+
38+
-- !query 3
39+
SELECT * FROM nt1 natural left join nt2 order by v1, v2
40+
-- !query 3 schema
41+
struct<k:string,v1:int,v2:int>
42+
-- !query 3 output
43+
one 1 1
44+
one 1 5
45+
two 2 22
46+
three 3 NULL
47+
48+
49+
-- !query 4
50+
SELECT * FROM nt1 natural right join nt2 order by v1, v2
51+
-- !query 4 schema
52+
struct<k:string,v1:int,v2:int>
53+
-- !query 4 output
54+
one 1 1
55+
one 1 5
56+
two 2 22
57+
58+
59+
-- !query 5
60+
SELECT count(*) FROM nt1 natural full outer join nt2
61+
-- !query 5 schema
62+
struct<count(1):bigint>
63+
-- !query 5 output
64+
4

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
3838

3939
setupTestData()
4040

41-
test("having clause") {
42-
withTempView("hav") {
43-
Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
44-
.createOrReplaceTempView("hav")
45-
checkAnswer(
46-
sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"),
47-
Row("one", 6) :: Row("three", 3) :: Nil)
48-
}
49-
}
50-
51-
test("having condition contains grouping column") {
52-
withTempView("hav") {
53-
Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v")
54-
.createOrReplaceTempView("hav")
55-
checkAnswer(
56-
sql("SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2"),
57-
Row(1) :: Nil)
58-
}
59-
}
60-
6141
test("SPARK-8010: promote numeric to string") {
6242
val df = Seq((1, 1)).toDF("key", "value")
6343
df.createOrReplaceTempView("src")
@@ -1969,15 +1949,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
19691949
}
19701950
}
19711951

1972-
test("SPARK-11032: resolve having correctly") {
1973-
withTempView("src") {
1974-
Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("src")
1975-
checkAnswer(
1976-
sql("SELECT MIN(t.i) FROM (SELECT * FROM src WHERE i > 0) t HAVING(COUNT(1) > 0)"),
1977-
Row(1))
1978-
}
1979-
}
1980-
19811952
test("SPARK-11303: filter should not be pushed down into sample") {
19821953
val df = spark.range(100)
19831954
List(true, false).foreach { withReplacement =>
@@ -2517,30 +2488,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
25172488
}
25182489
}
25192490

2520-
test("natural join") {
2521-
val df1 = Seq(("one", 1), ("two", 2), ("three", 3)).toDF("k", "v1")
2522-
val df2 = Seq(("one", 1), ("two", 22), ("one", 5)).toDF("k", "v2")
2523-
withTempView("nt1", "nt2") {
2524-
df1.createOrReplaceTempView("nt1")
2525-
df2.createOrReplaceTempView("nt2")
2526-
checkAnswer(
2527-
sql("SELECT * FROM nt1 natural join nt2 where k = \"one\""),
2528-
Row("one", 1, 1) :: Row("one", 1, 5) :: Nil)
2529-
2530-
checkAnswer(
2531-
sql("SELECT * FROM nt1 natural left join nt2 order by v1, v2"),
2532-
Row("one", 1, 1) :: Row("one", 1, 5) :: Row("two", 2, 22) :: Row("three", 3, null) :: Nil)
2533-
2534-
checkAnswer(
2535-
sql("SELECT * FROM nt1 natural right join nt2 order by v1, v2"),
2536-
Row("one", 1, 1) :: Row("one", 1, 5) :: Row("two", 2, 22) :: Nil)
2537-
2538-
checkAnswer(
2539-
sql("SELECT count(*) FROM nt1 natural full outer join nt2"),
2540-
Row(4) :: Nil)
2541-
}
2542-
}
2543-
25442491
test("join with using clause") {
25452492
val df1 = Seq(("r1c1", "r1c2", "t1r1c3"),
25462493
("r2c1", "r2c2", "t1r2c3"), ("r3c1x", "r3c2", "t1r3c3")).toDF("c1", "c2", "c3")
@@ -2991,13 +2938,4 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
29912938
data.selectExpr("`part.col1`", "`col.1`"))
29922939
}
29932940
}
2994-
2995-
test("current_date and current_timestamp literals") {
2996-
// NOTE that I am comparing the result of the literal with the result of the function call.
2997-
// This is done to prevent the test from failing because we are comparing a result to an out
2998-
// dated timestamp (quite likely) or date (very unlikely - but equally annoying).
2999-
checkAnswer(
3000-
sql("select current_date = current_date(), current_timestamp = current_timestamp()"),
3001-
Seq(Row(true, true)))
3002-
}
30032941
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@ package org.apache.spark.sql
2020
import java.io.File
2121
import java.util.{Locale, TimeZone}
2222

23+
import org.apache.spark.sql.catalyst.planning.PhysicalOperation
24+
import org.apache.spark.sql.catalyst.plans.logical._
2325
import org.apache.spark.sql.catalyst.rules.RuleExecutor
2426
import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
2527
import org.apache.spark.sql.test.SharedSQLContext
28+
import org.apache.spark.sql.types.StructType
2629

2730
/**
2831
* End-to-end test cases for SQL queries.
@@ -126,14 +129,18 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
126129
cleaned.split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
127130
}
128131

132+
// Create a local SparkSession to have stronger isolation between different test cases.
133+
// This does not isolate catalog changes.
134+
val localSparkSession = spark.newSession()
135+
129136
// Run the SQL queries preparing them for comparison.
130137
val outputs: Seq[QueryOutput] = queries.map { sql =>
131-
val df = spark.sql(sql)
138+
val (schema, output) = getNormalizedResult(localSparkSession, sql)
132139
// We might need to do some query canonicalization in the future.
133140
QueryOutput(
134141
sql = sql,
135-
schema = df.schema.catalogString,
136-
output = df.queryExecution.hiveResultString().mkString("\n"))
142+
schema = schema.catalogString,
143+
output = output.mkString("\n"))
137144
}
138145

139146
if (regenerateGoldenFiles) {
@@ -176,6 +183,23 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
176183
}
177184
}
178185

186+
/** Executes a query and returns the result as (schema of the output, normalized output). */
187+
private def getNormalizedResult(session: SparkSession, sql: String): (StructType, Seq[String]) = {
188+
// Returns true if the plan is supposed to be sorted.
189+
def isSorted(plan: LogicalPlan): Boolean = plan match {
190+
case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
191+
case PhysicalOperation(_, _, Sort(_, true, _)) => true
192+
case _ => plan.children.iterator.exists(isSorted)
193+
}
194+
195+
val df = session.sql(sql)
196+
val schema = df.schema
197+
val answer = df.queryExecution.hiveResultString()
198+
199+
// If the output is not pre-sorted, sort it.
200+
if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
201+
}
202+
179203
private def listTestCases(): Seq[TestCase] = {
180204
listFilesRecursively(new File(inputFilePath)).map { file =>
181205
val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"

0 commit comments

Comments
 (0)