-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18873][SQL][TEST] New test cases for scalar subquery (part 1 of 2) - scalar subquery in SELECT clause #16712
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
b988651
069ed8f
edca333
64184fd
29f82b0
ac43ab4
631d396
7eb9b2d
1387cf5
3faa2d5
a308634
f1524b9
5c36dce
862b2b8
211e325
48ff3c7
0db0bc3
818df9e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,139 @@ | ||
| -- A test suite for scalar subquery in SELECT clause | ||
|
|
||
| create temporary view t1 as select * from values | ||
| ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), | ||
| ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), | ||
| ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), | ||
| ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), | ||
| ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), | ||
| ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), | ||
| ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), | ||
| ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), | ||
| ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), | ||
| ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), | ||
| ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), | ||
| ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') | ||
| as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i); | ||
|
|
||
| create temporary view t2 as select * from values | ||
| ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), | ||
| ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), | ||
| ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), | ||
| ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), | ||
| ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), | ||
| ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), | ||
| ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), | ||
| ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), | ||
| ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), | ||
| ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), | ||
| ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), | ||
| ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), | ||
| ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) | ||
| as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i); | ||
|
|
||
| create temporary view t3 as select * from values | ||
| ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), | ||
| ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), | ||
| ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), | ||
| ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), | ||
| ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), | ||
| ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), | ||
| ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), | ||
| ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') | ||
| as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i); | ||
|
|
||
| -- Group 1: scalar subquery in SELECT clause | ||
| -- no correlation | ||
| -- TC 01.01 | ||
| -- more than one scalar subquery | ||
| SELECT (SELECT min(t3d) FROM t3) min_t3d, | ||
| (SELECT max(t2h) FROM t2) max_t2h | ||
| FROM t1 | ||
| WHERE t1a = 't1c' | ||
| ; | ||
|
||
|
|
||
| -- TC 01.02 | ||
| -- scalar subquery in an IN subquery | ||
| SELECT t1a, count(*) | ||
| FROM t1 | ||
| WHERE t1c IN (SELECT (SELECT min(t3c) FROM t3) | ||
| FROM t2 | ||
| GROUP BY t2g | ||
| HAVING count(*) > 1) | ||
| GROUP BY t1a | ||
| ; | ||
|
|
||
| -- TC 01.03 | ||
| -- under a set op | ||
| SELECT (SELECT min(t3d) FROM t3) min_t3d, | ||
| null | ||
| FROM t1 | ||
| WHERE t1a = 't1c' | ||
| UNION | ||
| SELECT null, | ||
| (SELECT max(t2h) FROM t2) max_t2h | ||
| FROM t1 | ||
| WHERE t1a = 't1c' | ||
| ; | ||
|
|
||
| -- TC 01.04 | ||
| SELECT (SELECT min(t3c) FROM t3) min_t3d | ||
| FROM t1 | ||
| WHERE t1a = 't1a' | ||
| INTERSECT | ||
| SELECT (SELECT min(t2c) FROM t2) min_t2d | ||
| FROM t1 | ||
| WHERE t1a = 't1d' | ||
| ; | ||
|
|
||
| -- TC 01.05 | ||
| SELECT q1.t1a, q2.t2a, q1.min_t3d, q2.avg_t3d | ||
| FROM (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d | ||
| FROM t1 | ||
| WHERE t1a IN ('t1e', 't1c')) q1 | ||
| FULL OUTER JOIN | ||
| (SELECT t2a, (SELECT avg(t3d) FROM t3) avg_t3d | ||
| FROM t2 | ||
| WHERE t2a IN ('t1c', 't2a')) q2 | ||
| ON q1.t1a = q2.t2a | ||
| AND q1.min_t3d < q2.avg_t3d | ||
| ; | ||
|
|
||
| -- Group 2: scalar subquery in SELECT clause | ||
| -- with correlation | ||
| -- TC 02.01 | ||
| SELECT (SELECT min(t3d) FROM t3 WHERE t3.t3a = t1.t1a) min_t3d, | ||
| (SELECT max(t2h) FROM t2 WHERE t2.t2a = t1.t1a) max_t2h | ||
| FROM t1 | ||
| WHERE t1a = 't1b' | ||
| ; | ||
|
|
||
| -- TC 02.02 | ||
| SELECT (SELECT min(t3d) FROM t3 WHERE t3a = t1a) min_t3d | ||
| FROM t1 | ||
| WHERE t1a = 't1b' | ||
| MINUS | ||
| SELECT (SELECT min(t3d) FROM t3) abs_min_t3d | ||
| FROM t1 | ||
| WHERE t1a = 't1b' | ||
| ; | ||
|
|
||
| -- TC 02.03 | ||
| SELECT t1a, t1b | ||
| FROM t1 | ||
| WHERE NOT EXISTS (SELECT (SELECT max(t2b) | ||
| FROM t2 LEFT JOIN t1 | ||
| ON t2a = t1a | ||
| WHERE t2c = t3c) dummy | ||
| FROM t3 | ||
| WHERE t3b < (SELECT max(t2b) | ||
| FROM t2 LEFT JOIN t1 | ||
| ON t2a = t1a | ||
| WHERE t2c = t3c) | ||
| AND t3a = t1a) | ||
|
|
||
|
||
| ; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,198 @@ | ||
| -- Automatically generated by SQLQueryTestSuite | ||
| -- Number of queries: 11 | ||
|
|
||
|
|
||
| -- !query 0 | ||
| create temporary view t1 as select * from values | ||
| ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), | ||
| ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), | ||
| ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), | ||
| ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), | ||
| ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), | ||
| ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), | ||
| ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), | ||
| ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), | ||
| ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), | ||
| ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), | ||
| ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), | ||
| ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') | ||
| as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i) | ||
| -- !query 0 schema | ||
| struct<> | ||
| -- !query 0 output | ||
|
|
||
|
|
||
|
|
||
| -- !query 1 | ||
| create temporary view t2 as select * from values | ||
| ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), | ||
| ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), | ||
| ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), | ||
| ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), | ||
| ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), | ||
| ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), | ||
| ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), | ||
| ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), | ||
| ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), | ||
| ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), | ||
| ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), | ||
| ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), | ||
| ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) | ||
| as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i) | ||
| -- !query 1 schema | ||
| struct<> | ||
| -- !query 1 output | ||
|
|
||
|
|
||
|
|
||
| -- !query 2 | ||
| create temporary view t3 as select * from values | ||
| ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), | ||
| ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), | ||
| ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), | ||
| ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), | ||
| ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), | ||
| ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), | ||
| ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), | ||
| ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), | ||
| ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') | ||
| as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i) | ||
| -- !query 2 schema | ||
| struct<> | ||
| -- !query 2 output | ||
|
|
||
|
|
||
|
|
||
| -- !query 3 | ||
| SELECT (SELECT min(t3d) FROM t3) min_t3d, | ||
| (SELECT max(t2h) FROM t2) max_t2h | ||
| FROM t1 | ||
| WHERE t1a = 't1c' | ||
| -- !query 3 schema | ||
| struct<min_t3d:bigint,max_t2h:timestamp> | ||
| -- !query 3 output | ||
| 10 2017-05-04 01:01:00 | ||
|
|
||
|
|
||
| -- !query 4 | ||
| SELECT t1a, count(*) | ||
| FROM t1 | ||
| WHERE t1c IN (SELECT (SELECT min(t3c) FROM t3) | ||
| FROM t2 | ||
| GROUP BY t2g | ||
| HAVING count(*) > 1) | ||
| GROUP BY t1a | ||
| -- !query 4 schema | ||
| struct<t1a:string,count(1):bigint> | ||
| -- !query 4 output | ||
| t1a 2 | ||
|
|
||
|
|
||
| -- !query 5 | ||
| SELECT (SELECT min(t3d) FROM t3) min_t3d, | ||
| null | ||
| FROM t1 | ||
| WHERE t1a = 't1c' | ||
| UNION | ||
| SELECT null, | ||
| (SELECT max(t2h) FROM t2) max_t2h | ||
| FROM t1 | ||
| WHERE t1a = 't1c' | ||
| -- !query 5 schema | ||
| struct<min_t3d:bigint,NULL:timestamp> | ||
| -- !query 5 output | ||
| 10 NULL | ||
| NULL 2017-05-04 01:01:00 | ||
|
|
||
|
|
||
| -- !query 6 | ||
| SELECT (SELECT min(t3c) FROM t3) min_t3d | ||
| FROM t1 | ||
| WHERE t1a = 't1a' | ||
| INTERSECT | ||
| SELECT (SELECT min(t2c) FROM t2) min_t2d | ||
| FROM t1 | ||
| WHERE t1a = 't1d' | ||
| -- !query 6 schema | ||
| struct<min_t3d:int> | ||
| -- !query 6 output | ||
| 12 | ||
|
|
||
|
|
||
| -- !query 7 | ||
| SELECT q1.t1a, q2.t2a, q1.min_t3d, q2.avg_t3d | ||
| FROM (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d | ||
| FROM t1 | ||
| WHERE t1a IN ('t1e', 't1c')) q1 | ||
| FULL OUTER JOIN | ||
| (SELECT t2a, (SELECT avg(t3d) FROM t3) avg_t3d | ||
| FROM t2 | ||
| WHERE t2a IN ('t1c', 't2a')) q2 | ||
| ON q1.t1a = q2.t2a | ||
| AND q1.min_t3d < q2.avg_t3d | ||
| -- !query 7 schema | ||
| struct<t1a:string,t2a:string,min_t3d:bigint,avg_t3d:double> | ||
| -- !query 7 output | ||
| NULL t2a NULL 200.83333333333334 | ||
| t1c t1c 10 200.83333333333334 | ||
| t1c t1c 10 200.83333333333334 | ||
| t1e NULL 10 NULL | ||
| t1e NULL 10 NULL | ||
| t1e NULL 10 NULL | ||
|
|
||
|
|
||
| -- !query 8 | ||
| SELECT (SELECT min(t3d) FROM t3 WHERE t3.t3a = t1.t1a) min_t3d, | ||
| (SELECT max(t2h) FROM t2 WHERE t2.t2a = t1.t1a) max_t2h | ||
| FROM t1 | ||
| WHERE t1a = 't1b' | ||
| -- !query 8 schema | ||
| struct<min_t3d:bigint,max_t2h:timestamp> | ||
| -- !query 8 output | ||
| 19 2017-05-04 01:01:00 | ||
|
|
||
|
|
||
| -- !query 9 | ||
| SELECT (SELECT min(t3d) FROM t3 WHERE t3a = t1a) min_t3d | ||
| FROM t1 | ||
| WHERE t1a = 't1b' | ||
| MINUS | ||
| SELECT (SELECT min(t3d) FROM t3) abs_min_t3d | ||
| FROM t1 | ||
| WHERE t1a = 't1b' | ||
| -- !query 9 schema | ||
| struct<min_t3d:bigint> | ||
| -- !query 9 output | ||
| 19 | ||
|
|
||
|
|
||
| -- !query 10 | ||
| SELECT t1a, t1b | ||
| FROM t1 | ||
| WHERE NOT EXISTS (SELECT (SELECT max(t2b) | ||
| FROM t2 LEFT JOIN t1 | ||
| ON t2a = t1a | ||
| WHERE t2c = t3c) dummy | ||
| FROM t3 | ||
| WHERE t3b < (SELECT max(t2b) | ||
| FROM t2 LEFT JOIN t1 | ||
| ON t2a = t1a | ||
| WHERE t2c = t3c) | ||
| AND t3a = t1a) | ||
| -- !query 10 schema | ||
| struct<t1a:string,t1b:smallint> | ||
| -- !query 10 output | ||
| t1a 16 | ||
| t1a 16 | ||
| t1a 6 | ||
| t1a 6 | ||
| t1c 8 | ||
| t1d 10 | ||
| t1d NULL | ||
| t1d NULL | ||
| t1e 10 | ||
| t1e 10 | ||
| t1e 10 | ||
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the reasons we use the column names as the value of
t3a,t2aandt1a? It looks confusing when reading the queries.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No particular reason. I just followed the convention used in #16337 that you reviewed and merged. Please suggest a pattern if you want to have this changed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, please change it to something like
val3a. It will be easy for reviewers to review the changes if you just change the prefix.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I missed this issue in #16337