From 4d99c11802efa2d6ee5c36de5941226bf12e1a55 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 7 Jul 2017 12:03:34 +0800 Subject: [PATCH 1/3] support un-aliased subquery --- .../sql/catalyst/parser/AstBuilder.scala | 16 ++++++++------- .../catalyst/plans/logical/LogicalPlan.scala | 2 +- .../sql/catalyst/parser/PlanParserSuite.scala | 13 ------------ .../resources/sql-tests/inputs/group-by.sql | 2 +- .../test/resources/sql-tests/inputs/limit.sql | 2 +- .../sql-tests/inputs/string-functions.sql | 2 +- .../in-subquery/in-set-operations.sql | 2 +- .../negative-cases/invalid-correlation.sql | 2 +- .../scalar-subquery-predicate.sql | 2 +- .../test/resources/sql-tests/inputs/union.sql | 4 ++-- .../results/columnresolution-negative.sql.out | 16 +++++++-------- .../sql-tests/results/group-by.sql.out | 2 +- .../resources/sql-tests/results/limit.sql.out | 2 +- .../results/string-functions.sql.out | 6 +++--- .../in-subquery/in-set-operations.sql.out | 2 +- .../invalid-correlation.sql.out | 2 +- .../scalar-subquery-predicate.sql.out | 2 +- .../results/subquery/subquery-in-from.sql.out | 20 ++++--------------- .../resources/sql-tests/results/union.sql.out | 4 ++-- .../apache/spark/sql/CachedTableSuite.scala | 18 ++++++++--------- .../org/apache/spark/sql/SQLQuerySuite.scala | 13 ++++++++++++ .../org/apache/spark/sql/SubquerySuite.scala | 8 ++++---- 22 files changed, 66 insertions(+), 76 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index b6a4686bb9ec..814c1f662ec8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -751,15 +751,17 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * hooks. */ override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) { - // The unaliased subqueries in the FROM clause are disallowed. Instead of rejecting it in - // parser rules, we handle it here in order to provide better error message. - if (ctx.strictIdentifier == null) { - throw new ParseException("The unaliased subqueries in the FROM clause are not supported.", - ctx) + val alias = if (ctx.strictIdentifier == null) { + // For un-aliased subqueries, ues a default alias name that is not likely to conflict with + // normal subquery names, so that parent operators can only access the columns in subquery by + // unqualified names. Users can still use this special qualifier to access columns if they + // know it, but that's not recommended. + "_auto_generated_subquery_name" + } else { + ctx.strictIdentifier.getText } - aliasPlan(ctx.strictIdentifier, - plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)) + SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 8649603b1a9f..9b440cd99f99 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -253,7 +253,7 @@ abstract class LogicalPlan // More than one match. case ambiguousReferences => - val referenceNames = ambiguousReferences.map(_._1).mkString(", ") + val referenceNames = ambiguousReferences.map(_._1.qualifiedName).mkString(", ") throw new AnalysisException( s"Reference '$name' is ambiguous, could be: $referenceNames.") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 5b2573fa4d60..6dad097041a1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -450,19 +450,6 @@ class PlanParserSuite extends AnalysisTest { | (select id from t0)) as u_1 """.stripMargin, plan.union(plan).union(plan).as("u_1").select('id)) - - } - - test("aliased subquery") { - val errMsg = "The unaliased subqueries in the FROM clause are not supported" - - assertEqual("select a from (select id as a from t0) tt", - table("t0").select('id.as("a")).as("tt").select('a)) - intercept("select a from (select id as a from t0)", errMsg) - - assertEqual("from (select id as a from t0) tt select a", - table("t0").select('id.as("a")).as("tt").select('a)) - intercept("from (select id as a from t0) select a", errMsg) } test("scalar sub-query") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index bc2120727dac..1e1384549a41 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -34,7 +34,7 @@ SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), FROM testData; -- Aggregate with foldable input and multiple distinct groups. -SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t GROUP BY a; +SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a; -- Aliases in SELECT could be used in GROUP BY SELECT a AS k, COUNT(b) FROM testData GROUP BY k; diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index df555bdc1976..f21912a04271 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -21,7 +21,7 @@ SELECT * FROM testdata LIMIT true; SELECT * FROM testdata LIMIT 'a'; -- limit within a subquery -SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3; +SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3; -- limit ALL SELECT * FROM testdata WHERE key < 3 LIMIT ALL; diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index 20c039066403..c95f4817b7ce 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -7,7 +7,7 @@ select 'a' || 'b' || 'c'; -- Check if catalyst combine nested `Concat`s EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col -FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t; +FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)); -- replace function select replace('abc', 'b', '123'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql index 42f84e974871..5c371d2305ac 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql @@ -394,7 +394,7 @@ FROM (SELECT * FROM t1)) t4 WHERE t4.t2b IN (SELECT Min(t3b) FROM t3 - WHERE t4.t2a = t3a)) T; + WHERE t4.t2a = t3a)); -- UNION, UNION ALL, UNION DISTINCT, INTERSECT and EXCEPT for NOT IN -- TC 01.12 diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql index f3f0c7622ccd..e22cade93679 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql @@ -23,7 +23,7 @@ AND t2b = (SELECT max(avg) FROM (SELECT t2b, avg(t2b) avg FROM t2 WHERE t2a = t1.t1b - ) T + ) ) ; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql index dbe8d76d2f11..fb0d07fbdace 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql @@ -19,7 +19,7 @@ AND c.cv = (SELECT max(avg) FROM (SELECT c1.cv, avg(c1.cv) avg FROM c c1 WHERE c1.ck = p.pk - GROUP BY c1.cv) T); + GROUP BY c1.cv)); create temporary view t1 as select * from values ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql index 63bc044535e4..e57d69eaad03 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/union.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/union.sql @@ -5,7 +5,7 @@ CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2); SELECT * FROM (SELECT * FROM t1 UNION ALL - SELECT * FROM t1) T; + SELECT * FROM t1); -- Type Coerced Union SELECT * @@ -13,7 +13,7 @@ FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2 UNION ALL - SELECT * FROM t2) T; + SELECT * FROM t2); -- Regression test for SPARK-18622 SELECT a diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out index 9e60e592c2bd..b5a4f5c2bf65 100644 --- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out @@ -72,7 +72,7 @@ SELECT i1 FROM t1, mydb1.t1 struct<> -- !query 8 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 9 @@ -81,7 +81,7 @@ SELECT t1.i1 FROM t1, mydb1.t1 struct<> -- !query 9 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 10 @@ -99,7 +99,7 @@ SELECT i1 FROM t1, mydb2.t1 struct<> -- !query 11 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 12 @@ -108,7 +108,7 @@ SELECT t1.i1 FROM t1, mydb2.t1 struct<> -- !query 12 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 13 @@ -125,7 +125,7 @@ SELECT i1 FROM t1, mydb1.t1 struct<> -- !query 14 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 15 @@ -134,7 +134,7 @@ SELECT t1.i1 FROM t1, mydb1.t1 struct<> -- !query 15 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 16 @@ -143,7 +143,7 @@ SELECT i1 FROM t1, mydb2.t1 struct<> -- !query 16 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 17 @@ -152,7 +152,7 @@ SELECT t1.i1 FROM t1, mydb2.t1 struct<> -- !query 17 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 18 diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index e23ebd4e822f..986bb01c13fe 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -134,7 +134,7 @@ struct -- !query 14 output diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index afdd6df2a571..146abe6cbd05 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -93,7 +93,7 @@ The limit expression must be integer type, but got string; -- !query 10 -SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3 +SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 -- !query 10 schema struct -- !query 10 output diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 52eb554edf89..a84ec4291998 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -30,20 +30,20 @@ abc -- !query 3 EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col -FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t +FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) -- !query 3 schema struct -- !query 3 output == Parsed Logical Plan == 'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x] -+- 'SubqueryAlias t ++- 'SubqueryAlias _auto_generated_subquery_name +- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x] +- 'UnresolvedTableValuedFunction range, [10] == Analyzed Logical Plan == col: string Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x] -+- SubqueryAlias t ++- SubqueryAlias _auto_generated_subquery_name +- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL] +- Range (0, 10, step=1, splits=None) diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out index 5780f49648ec..e06f9206d340 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out @@ -496,7 +496,7 @@ FROM (SELECT * FROM t1)) t4 WHERE t4.t2b IN (SELECT Min(t3b) FROM t3 - WHERE t4.t2a = t3a)) T + WHERE t4.t2a = t3a)) -- !query 13 schema struct -- !query 13 output diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index ca3930b33e06..e4b1a2dbc675 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -40,7 +40,7 @@ AND t2b = (SELECT max(avg) FROM (SELECT t2b, avg(t2b) avg FROM t2 WHERE t2a = t1.t1b - ) T + ) ) -- !query 3 schema struct<> diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out index 1d5dddca76a1..8b29300e71f9 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out @@ -39,7 +39,7 @@ AND c.cv = (SELECT max(avg) FROM (SELECT c1.cv, avg(c1.cv) avg FROM c c1 WHERE c1.ck = p.pk - GROUP BY c1.cv) T) + GROUP BY c1.cv)) -- !query 3 schema struct -- !query 3 output diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out index 14553557d1ff..50370df34916 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out @@ -37,26 +37,14 @@ struct -- !query 4 SELECT * FROM (SELECT * FROM testData) WHERE key = 1 -- !query 4 schema -struct<> +struct -- !query 4 output -org.apache.spark.sql.catalyst.parser.ParseException - -The unaliased subqueries in the FROM clause are not supported.(line 1, pos 14) - -== SQL == -SELECT * FROM (SELECT * FROM testData) WHERE key = 1 ---------------^^^ +1 1 -- !query 5 FROM (SELECT * FROM testData WHERE key = 1) SELECT * -- !query 5 schema -struct<> +struct -- !query 5 output -org.apache.spark.sql.catalyst.parser.ParseException - -The unaliased subqueries in the FROM clause are not supported.(line 1, pos 5) - -== SQL == -FROM (SELECT * FROM testData WHERE key = 1) SELECT * ------^^^ +1 1 diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out index 865b3aed65d7..d123b7fdbe0c 100644 --- a/sql/core/src/test/resources/sql-tests/results/union.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out @@ -22,7 +22,7 @@ struct<> SELECT * FROM (SELECT * FROM t1 UNION ALL - SELECT * FROM t1) T + SELECT * FROM t1) -- !query 2 schema struct -- !query 2 output @@ -38,7 +38,7 @@ FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2 UNION ALL - SELECT * FROM t2) T + SELECT * FROM t2) -- !query 3 schema struct -- !query 3 output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index 506cc2548e26..33e29c704f53 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -631,13 +631,13 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext val ds2 = sql( """ - |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt + |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1) |WHERE - |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1) + |c1 = (SELECT max(c1) FROM t2 GROUP BY c1) |OR |EXISTS (SELECT c1 FROM t3) |OR - |tt.c1 IN (SELECT c1 FROM t4) + |c1 IN (SELECT c1 FROM t4) """.stripMargin) assert(getNumInMemoryRelations(ds2) == 4) } @@ -788,25 +788,25 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext // Scalar subquery and predicate subquery sql( """ - |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt + |SELECT * FROM (SELECT c1, max(c1) as c1 FROM t1 GROUP BY c1) |WHERE - |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1) + |c1 = (SELECT max(c1) FROM t2 GROUP BY c1) |OR |EXISTS (SELECT c1 FROM t3) |OR - |tt.c1 IN (SELECT c1 FROM t4) + |c1 IN (SELECT c1 FROM t4) """.stripMargin).cache() val cachedDs2 = sql( """ - |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt + |SELECT * FROM (SELECT c1, max(c1) as c1 FROM t1 GROUP BY c1) |WHERE - |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1) + |c1 = (SELECT max(c1) FROM t2 GROUP BY c1) |OR |EXISTS (SELECT c1 FROM t3) |OR - |tt.c1 IN (SELECT c1 FROM t4) + |c1 IN (SELECT c1 FROM t4) """.stripMargin) assert(getNumInMemoryRelations(cachedDs2) == 1) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 5171aaebc990..e5e8a4369155 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2638,4 +2638,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } } + + test("SPARK-21335: support un-aliased subquery") { + withTempView("v") { + Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v") + checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1)) + + val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM v)")) + assert(e.message == + "cannot resolve '`v.i`' given input columns: [_auto_generated_subquery_name.i]") + + checkAnswer(sql("SELECT _auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1)) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index c0a3b5add313..7bcb419e8df6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -112,7 +112,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext { | with t4 as (select 1 as d, 3 as e) | select * from t4 cross join t2 where t2.b = t4.d | ) - | select a from (select 1 as a union all select 2 as a) t + | select a from (select 1 as a union all select 2 as a) | where a = (select max(d) from t3) """.stripMargin), Array(Row(1)) @@ -606,8 +606,8 @@ class SubquerySuite extends QueryTest with SharedSQLContext { | select cntPlusOne + 1 as cntPlusTwo from ( | select cnt + 1 as cntPlusOne from ( | select sum(r.c) s, count(*) cnt from r where l.a = r.c having cnt = 0 - | ) t1 - | ) t2 + | ) + | ) |) = 2""".stripMargin), Row(1) :: Row(1) :: Row(null) :: Row(null) :: Nil) } @@ -655,7 +655,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext { """ | select c1 from onerow t1 | where exists (select 1 - | from (select 1 as c1 from onerow t2 LIMIT 1) t2 + | from (select c1 from onerow t2 LIMIT 1) t2 | where t1.c1=t2.c1)""".stripMargin), Row(1) :: Nil) } From 479e53c20303d55a04fd5e98440275332ebb3e5e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 7 Jul 2017 12:26:12 +0800 Subject: [PATCH 2/3] fix typo --- .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../apache/spark/sql/CachedTableSuite.scala | 72 ++++++------------- 2 files changed, 22 insertions(+), 52 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 814c1f662ec8..a847c4c263e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -752,7 +752,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { */ override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) { val alias = if (ctx.strictIdentifier == null) { - // For un-aliased subqueries, ues a default alias name that is not likely to conflict with + // For un-aliased subqueries, use a default alias name that is not likely to conflict with // normal subquery names, so that parent operators can only access the columns in subquery by // unqualified names. Users can still use this special qualifier to access columns if they // know it, but that's not recommended. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index 33e29c704f53..3e4f61943159 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -683,20 +683,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext Seq(1).toDF("c1").createOrReplaceTempView("t1") Seq(2).toDF("c1").createOrReplaceTempView("t2") - sql( + val sql1 = """ |SELECT * FROM t1 |WHERE |NOT EXISTS (SELECT * FROM t2) - """.stripMargin).cache() + """.stripMargin + sql(sql1).cache() - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |NOT EXISTS (SELECT * FROM t2) - """.stripMargin) + val cachedDs = sql(sql1) assert(getNumInMemoryRelations(cachedDs) == 1) // Additional predicate in the subquery plan should cause a cache miss @@ -717,20 +712,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext Seq(1).toDF("c1").createOrReplaceTempView("t2") // Simple correlated predicate in subquery - sql( + val sqlText = """ |SELECT * FROM t1 |WHERE |t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1) - """.stripMargin).cache() + """.stripMargin + sql(sqlText).cache() - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1) - """.stripMargin) + val cachedDs = sql(sqlText) assert(getNumInMemoryRelations(cachedDs) == 1) } } @@ -741,22 +731,16 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext spark.catalog.cacheTable("t1") // underlying table t1 is cached as well as the query that refers to it. - val ds = - sql( + val sqlText = """ |SELECT * FROM t1 |WHERE |NOT EXISTS (SELECT * FROM t1) - """.stripMargin) + """.stripMargin + val ds = sql(sqlText) assert(getNumInMemoryRelations(ds) == 2) - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |NOT EXISTS (SELECT * FROM t1) - """.stripMargin).cache() + val cachedDs = sql(sqlText).cache() assert(getNumInMemoryTablesRecursively(cachedDs.queryExecution.sparkPlan) == 3) } } @@ -769,45 +753,31 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext Seq(1).toDF("c1").createOrReplaceTempView("t4") // Nested predicate subquery - sql( + val sql1 = """ |SELECT * FROM t1 |WHERE |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1)) - """.stripMargin).cache() + """.stripMargin + sql(sql1).cache() - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1)) - """.stripMargin) + val cachedDs = sql(sql1) assert(getNumInMemoryRelations(cachedDs) == 1) // Scalar subquery and predicate subquery - sql( + val sql2 = """ - |SELECT * FROM (SELECT c1, max(c1) as c1 FROM t1 GROUP BY c1) + |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1) |WHERE |c1 = (SELECT max(c1) FROM t2 GROUP BY c1) |OR |EXISTS (SELECT c1 FROM t3) |OR |c1 IN (SELECT c1 FROM t4) - """.stripMargin).cache() + """.stripMargin + sql(sql2).cache() - val cachedDs2 = - sql( - """ - |SELECT * FROM (SELECT c1, max(c1) as c1 FROM t1 GROUP BY c1) - |WHERE - |c1 = (SELECT max(c1) FROM t2 GROUP BY c1) - |OR - |EXISTS (SELECT c1 FROM t3) - |OR - |c1 IN (SELECT c1 FROM t4) - """.stripMargin) + val cachedDs2 = sql(sql2) assert(getNumInMemoryRelations(cachedDs2) == 1) } } From 822932c9fb3cafe2971475c242c9c0c5c17c231e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 7 Jul 2017 15:52:19 +0800 Subject: [PATCH 3/3] address comments --- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../test/resources/sql-tests/results/string-functions.sql.out | 4 ++-- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a847c4c263e3..4d725904bc9b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -756,7 +756,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { // normal subquery names, so that parent operators can only access the columns in subquery by // unqualified names. Users can still use this special qualifier to access columns if they // know it, but that's not recommended. - "_auto_generated_subquery_name" + "__auto_generated_subquery_name" } else { ctx.strictIdentifier.getText } diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index a84ec4291998..b0ae9d775d96 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -36,14 +36,14 @@ struct -- !query 3 output == Parsed Logical Plan == 'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x] -+- 'SubqueryAlias _auto_generated_subquery_name ++- 'SubqueryAlias __auto_generated_subquery_name +- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x] +- 'UnresolvedTableValuedFunction range, [10] == Analyzed Logical Plan == col: string Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x] -+- SubqueryAlias _auto_generated_subquery_name ++- SubqueryAlias __auto_generated_subquery_name +- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL] +- Range (0, 10, step=1, splits=None) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index e5e8a4369155..472ff7385b19 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2646,9 +2646,9 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM v)")) assert(e.message == - "cannot resolve '`v.i`' given input columns: [_auto_generated_subquery_name.i]") + "cannot resolve '`v.i`' given input columns: [__auto_generated_subquery_name.i]") - checkAnswer(sql("SELECT _auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1)) + checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1)) } } }