From cff4becc1179d2c980e0b5b93ea26c31100824ee Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 2 Jul 2019 08:06:48 +0200 Subject: [PATCH 1/5] [SPARK-19799][SQL] Support WITH clause in subqueries --- .../spark/sql/catalyst/parser/SqlBase.g4 | 4 +- .../sql/catalyst/analysis/Analyzer.scala | 31 --- .../catalyst/analysis/CTESubstitution.scala | 52 +++++ .../sql/catalyst/parser/AstBuilder.scala | 4 +- .../test/resources/sql-tests/inputs/cte.sql | 113 ++++++++++ .../resources/sql-tests/results/cte.sql.out | 198 +++++++++++++++++- 6 files changed, 361 insertions(+), 41 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index aff1415de4e8..54a61193f637 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -412,7 +412,7 @@ queryPrimary | fromStatement #fromStmt | TABLE multipartIdentifier #table | inlineTable #inlineTableDefault1 - | '(' queryNoWith ')' #subquery + | '(' query ')' #subquery ; sortItem @@ -583,7 +583,7 @@ identifierComment relationPrimary : multipartIdentifier sample? tableAlias #tableName - | '(' queryNoWith ')' sample? tableAlias #aliasedQuery + | '(' query ')' sample? tableAlias #aliasedQuery | '(' relation ')' sample? tableAlias #aliasedRelation | inlineTable #inlineTableDefault2 | functionTable #tableValuedFunction diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 60517f11a249..b799b39c2f36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -211,37 +211,6 @@ class Analyzer( CleanupAliases) ) - /** - * Analyze cte definitions and substitute child plan with analyzed cte definitions. - */ - object CTESubstitution extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { - case With(child, relations) => - // substitute CTE expressions right-to-left to resolve references to previous CTEs: - // with a as (select * from t), b as (select * from a) select * from b - relations.foldRight(child) { - case ((cteName, ctePlan), currentPlan) => - substituteCTE(currentPlan, cteName, ctePlan) - } - case other => other - } - - def substituteCTE(plan: LogicalPlan, cteName: String, ctePlan: LogicalPlan): LogicalPlan = { - plan resolveOperatorsUp { - case UnresolvedRelation(Seq(table)) if resolver(cteName, table) => - ctePlan - case u: UnresolvedRelation => - u - case other => - // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE. - other transformExpressions { - case e: SubqueryExpression => - e.withNewPlan(substituteCTE(e.plan, cteName, ctePlan)) - } - } - } - } - /** * Substitute child plan with WindowSpecDefinitions. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala new file mode 100644 index 000000000000..089106425eb9 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, With} +import org.apache.spark.sql.catalyst.rules.Rule + +/** + * Analyze WITH nodes and substitute child plan with CTE definitions. + */ +object CTESubstitution extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { + case With(child, relations) => + // substitute CTE expressions right-to-left to resolve references to previous CTEs: + // with a as (select * from t), b as (select * from a) select * from b + relations.foldRight(child) { + case ((cteName, ctePlan), currentPlan) => substituteCTE(currentPlan, cteName, ctePlan) + } + case other => other + } + + private def substituteCTE( + plan: LogicalPlan, + cteName: String, + ctePlan: LogicalPlan): LogicalPlan = { + plan resolveOperatorsUp { + case UnresolvedRelation(Seq(table)) if plan.conf.resolver(cteName, table) => ctePlan + + case o => + // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE. + o transformExpressions { + case e: SubqueryExpression => e.withNewPlan(substituteCTE(e.plan, cteName, ctePlan)) + } + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f08cb2a780fe..45992d83e495 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -890,7 +890,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create a logical plan for a sub-query. */ override def visitSubquery(ctx: SubqueryContext): LogicalPlan = withOrigin(ctx) { - plan(ctx.queryNoWith) + plan(ctx.query) } /** @@ -978,7 +978,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * }}} */ override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) { - val relation = plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample) + val relation = plan(ctx.query).optionalMap(ctx.sample)(withSample) if (ctx.tableAlias.strictIdentifier == null) { // For un-aliased subqueries, use a default alias name that is not likely to conflict with // normal subquery names, so that parent operators can only access the columns in subquery by diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index 822c5c4660e3..b170bd69d523 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -28,6 +28,119 @@ FROM CTE1 t1 WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE x = 1; +-- CTE in CTE definition +WITH t as ( + WITH t2 AS (SELECT 1) + SELECT * FROM t2 +) +SELECT * FROM t; + +-- CTE in subquery +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 1) + SELECT * FROM t +); + +-- CTE in subquery expression +SELECT ( + WITH t AS (SELECT 1) + SELECT * FROM t +); + +-- CTE in CTE definition shadows outer +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +SELECT * FROM t2; + +-- CTE in CTE definition shadows outer 2" +WITH + t(c) AS (SELECT 1), + t2 AS ( + SELECT ( + SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) + ) + ) +SELECT * FROM t2; + +-- CTE in CTE definition shadows outer 3 +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2), + t2 AS ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) + SELECT * FROM t2 + ) +SELECT * FROM t2; + +-- CTE in subquery shadows outer +WITH t(c) AS (SELECT 1) +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t +); + +-- CTE in subquery shadows outer 2 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) +); + +-- CTE in subquery shadows outer 3 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 3) + SELECT * FROM t + ) +); + +-- CTE in subquery expression shadows outer +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t +); + +-- CTE in subquery expression shadows outer 2 +WITH t AS (SELECT 1) +SELECT ( + SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +); + +-- CTE in subquery expression shadows outer 3 +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) +); + +-- no infinite recursion during CTE substitution +WITH r AS (SELECT * FROM r) +SELECT * FROM r; + +WITH r AS (SELECT (SELECT * FROM r)) +SELECT * FROM r; + -- Clean up DROP VIEW IF EXISTS t; DROP VIEW IF EXISTS t2; diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index f8ccecbc46f4..1694302ec01c 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 10 +-- Number of queries: 24 -- !query 0 @@ -98,16 +98,202 @@ struct -- !query 8 -DROP VIEW IF EXISTS t +WITH t as ( + WITH t2 AS (SELECT 1) + SELECT * FROM t2 +) +SELECT * FROM t -- !query 8 schema -struct<> +struct<1:int> -- !query 8 output - +1 -- !query 9 -DROP VIEW IF EXISTS t2 +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 1) + SELECT * FROM t +) -- !query 9 schema -struct<> +struct -- !query 9 output +1 + + +-- !query 10 +SELECT ( + WITH t AS (SELECT 1) + SELECT * FROM t +) +-- !query 10 schema +struct +-- !query 10 output +1 + + +-- !query 11 +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +SELECT * FROM t2 +-- !query 11 schema +struct<1:int> +-- !query 11 output +1 + + +-- !query 12 +WITH + t(c) AS (SELECT 1), + t2 AS ( + SELECT ( + SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) + ) + ) +SELECT * FROM t2 +-- !query 12 schema +struct +-- !query 12 output +1 + + +-- !query 13 +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2), + t2 AS ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) + SELECT * FROM t2 + ) +SELECT * FROM t2 +-- !query 13 schema +struct<2:int> +-- !query 13 output +2 + + +-- !query 14 +WITH t(c) AS (SELECT 1) +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t +) +-- !query 14 schema +struct +-- !query 14 output +2 + + +-- !query 15 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) +) +-- !query 15 schema +struct +-- !query 15 output +2 + + +-- !query 16 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 3) + SELECT * FROM t + ) +) +-- !query 16 schema +struct +-- !query 16 output +3 + + +-- !query 17 +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t +) +-- !query 17 schema +struct +-- !query 17 output +1 + + +-- !query 18 +WITH t AS (SELECT 1) +SELECT ( + SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +) +-- !query 18 schema +struct +-- !query 18 output +1 + + +-- !query 19 +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) +) +-- !query 19 schema +struct +-- !query 19 output +1 + + +-- !query 20 +WITH r AS (SELECT * FROM r) +SELECT * FROM r +-- !query 20 schema +struct<> +-- !query 20 output +org.apache.spark.sql.AnalysisException +Table or view not found: r; line 1 pos 25 + + +-- !query 21 +WITH r AS (SELECT (SELECT * FROM r)) +SELECT * FROM r +-- !query 21 schema +struct<> +-- !query 21 output +org.apache.spark.sql.AnalysisException +Table or view not found: r; line 1 pos 33 + + +-- !query 22 +DROP VIEW IF EXISTS t +-- !query 22 schema +struct<> +-- !query 22 output + + + +-- !query 23 +DROP VIEW IF EXISTS t2 +-- !query 23 schema +struct<> +-- !query 23 output From 85e39d498e8f1fd08a22bb856b5e736ce652c24a Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 2 Jul 2019 09:09:09 +0200 Subject: [PATCH 2/5] minor fix --- sql/core/src/test/resources/sql-tests/inputs/cte.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index b170bd69d523..afbc6c1d4ae4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -56,7 +56,7 @@ WITH ) SELECT * FROM t2; --- CTE in CTE definition shadows outer 2" +-- CTE in CTE definition shadows outer 2 WITH t(c) AS (SELECT 1), t2 AS ( From 837c7763999b1aff30c87b2a25e3dc8ed633abf8 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 3 Jul 2019 09:00:18 +0200 Subject: [PATCH 3/5] fix review findings --- .../spark/sql/catalyst/parser/SqlBase.g4 | 6 +-- .../sql/catalyst/analysis/Analyzer.scala | 31 +++++++++++ .../catalyst/analysis/CTESubstitution.scala | 52 ------------------- .../sql/catalyst/parser/AstBuilder.scala | 6 +-- 4 files changed, 33 insertions(+), 62 deletions(-) delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 54a61193f637..a7b7535d7336 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -278,7 +278,7 @@ locationSpec ; query - : ctes? queryNoWith + : ctes? queryTerm queryOrganization ; insertInto @@ -380,10 +380,6 @@ dmlStatementNoWith | fromClause multiInsertQueryBody+ #multiInsertQuery ; -queryNoWith - : queryTerm queryOrganization - ; - queryOrganization : (ORDER BY order+=sortItem (',' order+=sortItem)*)? (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b799b39c2f36..203dcd3e66e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -211,6 +211,37 @@ class Analyzer( CleanupAliases) ) + /** + * Analyze cte definitions and substitute child plan with analyzed cte definitions. + */ + object CTESubstitution extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { + case With(child, relations) => + // substitute CTE expressions right-to-left to resolve references to previous CTEs: + // with a as (select * from t), b as (select * from a) select * from b + relations.foldRight(child) { + case ((cteName, ctePlan), currentPlan) => + substituteCTE(currentPlan, cteName, ctePlan) + } + case other => other + } + + private def substituteCTE( + plan: LogicalPlan, + cteName: String, + ctePlan: LogicalPlan): LogicalPlan = + plan resolveOperatorsUp { + case UnresolvedRelation(Seq(table)) if resolver(cteName, table) => + ctePlan + case other => + // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE. + other transformExpressions { + case e: SubqueryExpression => + e.withNewPlan(substituteCTE(e.plan, cteName, ctePlan)) + } + } + } + /** * Substitute child plan with WindowSpecDefinitions. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala deleted file mode 100644 index 089106425eb9..000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.analysis - -import org.apache.spark.sql.catalyst.expressions.SubqueryExpression -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, With} -import org.apache.spark.sql.catalyst.rules.Rule - -/** - * Analyze WITH nodes and substitute child plan with CTE definitions. - */ -object CTESubstitution extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { - case With(child, relations) => - // substitute CTE expressions right-to-left to resolve references to previous CTEs: - // with a as (select * from t), b as (select * from a) select * from b - relations.foldRight(child) { - case ((cteName, ctePlan), currentPlan) => substituteCTE(currentPlan, cteName, ctePlan) - } - case other => other - } - - private def substituteCTE( - plan: LogicalPlan, - cteName: String, - ctePlan: LogicalPlan): LogicalPlan = { - plan resolveOperatorsUp { - case UnresolvedRelation(Seq(table)) if plan.conf.resolver(cteName, table) => ctePlan - - case o => - // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE. - o transformExpressions { - case e: SubqueryExpression => e.withNewPlan(substituteCTE(e.plan, cteName, ctePlan)) - } - } - } -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 45992d83e495..5eef8dbdfbff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -111,7 +111,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create a top-level plan with Common Table Expressions. */ override def visitQuery(ctx: QueryContext): LogicalPlan = withOrigin(ctx) { - val query = plan(ctx.queryNoWith) + val query = plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses) // Apply CTEs query.optionalMap(ctx.ctes)(withCTE) @@ -175,10 +175,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } - override def visitQueryNoWith(ctx: QueryNoWithContext): LogicalPlan = withOrigin(ctx) { - plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses) - } - /** * Create a named logical plan. * From d0c57c8994da0a1cb9c6bd479ff26bafad325b14 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 3 Jul 2019 11:09:04 +0200 Subject: [PATCH 4/5] fix review findings 2 --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 203dcd3e66e7..e7f8ca696007 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -229,7 +229,7 @@ class Analyzer( private def substituteCTE( plan: LogicalPlan, cteName: String, - ctePlan: LogicalPlan): LogicalPlan = + ctePlan: LogicalPlan): LogicalPlan = { plan resolveOperatorsUp { case UnresolvedRelation(Seq(table)) if resolver(cteName, table) => ctePlan @@ -240,6 +240,7 @@ class Analyzer( e.withNewPlan(substituteCTE(e.plan, cteName, ctePlan)) } } + } } /** From 9ec6eaf3f1b992ae3c0101464483f3a7c9c8f70a Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 3 Jul 2019 12:00:51 +0200 Subject: [PATCH 5/5] remove duplicate test --- .../test/resources/sql-tests/inputs/cte.sql | 10 +- .../resources/sql-tests/results/cte.sql.out | 138 ++++++++---------- 2 files changed, 67 insertions(+), 81 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index afbc6c1d4ae4..ac448eb2b27b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -4,6 +4,9 @@ create temporary view t2 as select * from values 0, 1 as t(id); -- WITH clause should not fall into infinite loop by referencing self WITH s AS (SELECT 1 FROM s) SELECT * FROM s; +WITH r AS (SELECT (SELECT * FROM r)) +SELECT * FROM r; + -- WITH clause should reference the base table WITH t AS (SELECT 1 FROM t) SELECT * FROM t; @@ -134,13 +137,6 @@ SELECT ( ) ); --- no infinite recursion during CTE substitution -WITH r AS (SELECT * FROM r) -SELECT * FROM r; - -WITH r AS (SELECT (SELECT * FROM r)) -SELECT * FROM r; - -- Clean up DROP VIEW IF EXISTS t; DROP VIEW IF EXISTS t2; diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index 1694302ec01c..b89e29fd08dc 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 24 +-- Number of queries: 23 -- !query 0 @@ -28,36 +28,46 @@ Table or view not found: s; line 1 pos 25 -- !query 3 -WITH t AS (SELECT 1 FROM t) SELECT * FROM t +WITH r AS (SELECT (SELECT * FROM r)) +SELECT * FROM r -- !query 3 schema -struct<1:int> +struct<> -- !query 3 output +org.apache.spark.sql.AnalysisException +Table or view not found: r; line 1 pos 33 + + +-- !query 4 +WITH t AS (SELECT 1 FROM t) SELECT * FROM t +-- !query 4 schema +struct<1:int> +-- !query 4 output 1 1 1 --- !query 4 +-- !query 5 WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2 --- !query 4 schema +-- !query 5 schema struct<> --- !query 4 output +-- !query 5 output org.apache.spark.sql.AnalysisException Table or view not found: s2; line 1 pos 26 --- !query 5 +-- !query 6 WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2 --- !query 5 schema +-- !query 6 schema struct --- !query 5 output +-- !query 6 output 0 2 0 2 1 2 1 2 --- !query 6 +-- !query 7 WITH CTE1 AS ( SELECT b.id AS id FROM T2 a @@ -67,9 +77,9 @@ SELECT t1.id AS c1, t2.id AS c2 FROM CTE1 t1 CROSS JOIN CTE1 t2 --- !query 6 schema +-- !query 7 schema struct --- !query 6 output +-- !query 7 output 0 0 0 0 0 0 @@ -88,50 +98,50 @@ struct 1 1 --- !query 7 +-- !query 8 WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE x = 1 --- !query 7 schema +-- !query 8 schema struct --- !query 7 output +-- !query 8 output 1 --- !query 8 +-- !query 9 WITH t as ( WITH t2 AS (SELECT 1) SELECT * FROM t2 ) SELECT * FROM t --- !query 8 schema +-- !query 9 schema struct<1:int> --- !query 8 output +-- !query 9 output 1 --- !query 9 +-- !query 10 SELECT max(c) FROM ( WITH t(c) AS (SELECT 1) SELECT * FROM t ) --- !query 9 schema +-- !query 10 schema struct --- !query 9 output +-- !query 10 output 1 --- !query 10 +-- !query 11 SELECT ( WITH t AS (SELECT 1) SELECT * FROM t ) --- !query 10 schema +-- !query 11 schema struct --- !query 10 output +-- !query 11 output 1 --- !query 11 +-- !query 12 WITH t AS (SELECT 1), t2 AS ( @@ -139,13 +149,13 @@ WITH SELECT * FROM t ) SELECT * FROM t2 --- !query 11 schema +-- !query 12 schema struct<1:int> --- !query 11 output +-- !query 12 output 1 --- !query 12 +-- !query 13 WITH t(c) AS (SELECT 1), t2 AS ( @@ -157,13 +167,13 @@ WITH ) ) SELECT * FROM t2 --- !query 12 schema +-- !query 13 schema struct --- !query 12 output +-- !query 13 output 1 --- !query 13 +-- !query 14 WITH t AS (SELECT 1), t2 AS ( @@ -175,25 +185,25 @@ WITH SELECT * FROM t2 ) SELECT * FROM t2 --- !query 13 schema +-- !query 14 schema struct<2:int> --- !query 13 output +-- !query 14 output 2 --- !query 14 +-- !query 15 WITH t(c) AS (SELECT 1) SELECT max(c) FROM ( WITH t(c) AS (SELECT 2) SELECT * FROM t ) --- !query 14 schema +-- !query 15 schema struct --- !query 14 output +-- !query 15 output 2 --- !query 15 +-- !query 16 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( SELECT max(c) AS c FROM ( @@ -201,13 +211,13 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 15 schema +-- !query 16 schema struct --- !query 15 output +-- !query 16 output 2 --- !query 16 +-- !query 17 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( WITH t(c) AS (SELECT 2) @@ -216,25 +226,25 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 16 schema +-- !query 17 schema struct --- !query 16 output +-- !query 17 output 3 --- !query 17 +-- !query 18 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) SELECT * FROM t ) --- !query 17 schema +-- !query 18 schema struct --- !query 17 output +-- !query 18 output 1 --- !query 18 +-- !query 19 WITH t AS (SELECT 1) SELECT ( SELECT ( @@ -242,13 +252,13 @@ SELECT ( SELECT * FROM t ) ) --- !query 18 schema +-- !query 19 schema struct --- !query 18 output +-- !query 19 output 1 --- !query 19 +-- !query 20 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) @@ -257,43 +267,23 @@ SELECT ( SELECT * FROM t ) ) --- !query 19 schema -struct --- !query 19 output -1 - - --- !query 20 -WITH r AS (SELECT * FROM r) -SELECT * FROM r -- !query 20 schema -struct<> +struct -- !query 20 output -org.apache.spark.sql.AnalysisException -Table or view not found: r; line 1 pos 25 +1 -- !query 21 -WITH r AS (SELECT (SELECT * FROM r)) -SELECT * FROM r +DROP VIEW IF EXISTS t -- !query 21 schema struct<> -- !query 21 output -org.apache.spark.sql.AnalysisException -Table or view not found: r; line 1 pos 33 + -- !query 22 -DROP VIEW IF EXISTS t +DROP VIEW IF EXISTS t2 -- !query 22 schema struct<> -- !query 22 output - - --- !query 23 -DROP VIEW IF EXISTS t2 --- !query 23 schema -struct<> --- !query 23 output -