diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5eef8dbdfbff..6c5ad55e88be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -129,7 +129,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging (namedQuery.alias, namedQuery) } // Check for duplicate names. - checkDuplicateKeys(ctes, ctx) + val duplicates = ctes.groupBy(_._1).filter(_._2.size > 1).keys + if (duplicates.nonEmpty) { + throw new ParseException( + s"CTE definition can't have duplicate names: ${duplicates.mkString("'", "', '", "'")}.", + ctx) + } With(plan, ctes) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index d48da4ab3216..fb245eef5e4b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -100,7 +100,7 @@ class PlanParserSuite extends AnalysisTest { "cte2" -> ((table("cte1").select(star()), Seq.empty)))) intercept( "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1", - "Found duplicate keys 'cte1'") + "CTE definition can't have duplicate names: 'cte1'.") } test("simple select query") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index ac448eb2b27b..d0e145c35a9f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -31,6 +31,24 @@ FROM CTE1 t1 WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE x = 1; +-- CTE with multiple column aliases +WITH t(x, y) AS (SELECT 1, 2) +SELECT * FROM t WHERE x = 1 AND y = 2; + +-- CTE with duplicate column aliases +WITH t(x, x) AS (SELECT 1, 2) +SELECT * FROM t; + +-- CTE with empty column alias list is not allowed +WITH t() AS (SELECT 1) +SELECT * FROM t; + +-- CTEs with duplicate names are not allowed +WITH + t(x) AS (SELECT 1), + t(x) AS (SELECT 2) +SELECT * FROM t; + -- CTE in CTE definition WITH t as ( WITH t2 AS (SELECT 1) diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index b89e29fd08dc..9e90908d92fa 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 23 +-- Number of queries: 27 -- !query 0 @@ -108,40 +108,94 @@ struct -- !query 9 +WITH t(x, y) AS (SELECT 1, 2) +SELECT * FROM t WHERE x = 1 AND y = 2 +-- !query 9 schema +struct +-- !query 9 output +1 2 + + +-- !query 10 +WITH t(x, x) AS (SELECT 1, 2) +SELECT * FROM t +-- !query 10 schema +struct +-- !query 10 output +1 2 + + +-- !query 11 +WITH t() AS (SELECT 1) +SELECT * FROM t +-- !query 11 schema +struct<> +-- !query 11 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input 'WITH t()'(line 1, pos 7) + +== SQL == +WITH t() AS (SELECT 1) +-------^^^ +SELECT * FROM t + + +-- !query 12 +WITH + t(x) AS (SELECT 1), + t(x) AS (SELECT 2) +SELECT * FROM t +-- !query 12 schema +struct<> +-- !query 12 output +org.apache.spark.sql.catalyst.parser.ParseException + +CTE definition can't have duplicate names: 't'.(line 1, pos 0) + +== SQL == +WITH +^^^ + t(x) AS (SELECT 1), + t(x) AS (SELECT 2) +SELECT * FROM t + + +-- !query 13 WITH t as ( WITH t2 AS (SELECT 1) SELECT * FROM t2 ) SELECT * FROM t --- !query 9 schema +-- !query 13 schema struct<1:int> --- !query 9 output +-- !query 13 output 1 --- !query 10 +-- !query 14 SELECT max(c) FROM ( WITH t(c) AS (SELECT 1) SELECT * FROM t ) --- !query 10 schema +-- !query 14 schema struct --- !query 10 output +-- !query 14 output 1 --- !query 11 +-- !query 15 SELECT ( WITH t AS (SELECT 1) SELECT * FROM t ) --- !query 11 schema +-- !query 15 schema struct --- !query 11 output +-- !query 15 output 1 --- !query 12 +-- !query 16 WITH t AS (SELECT 1), t2 AS ( @@ -149,13 +203,13 @@ WITH SELECT * FROM t ) SELECT * FROM t2 --- !query 12 schema +-- !query 16 schema struct<1:int> --- !query 12 output +-- !query 16 output 1 --- !query 13 +-- !query 17 WITH t(c) AS (SELECT 1), t2 AS ( @@ -167,13 +221,13 @@ WITH ) ) SELECT * FROM t2 --- !query 13 schema +-- !query 17 schema struct --- !query 13 output +-- !query 17 output 1 --- !query 14 +-- !query 18 WITH t AS (SELECT 1), t2 AS ( @@ -185,25 +239,25 @@ WITH SELECT * FROM t2 ) SELECT * FROM t2 --- !query 14 schema +-- !query 18 schema struct<2:int> --- !query 14 output +-- !query 18 output 2 --- !query 15 +-- !query 19 WITH t(c) AS (SELECT 1) SELECT max(c) FROM ( WITH t(c) AS (SELECT 2) SELECT * FROM t ) --- !query 15 schema +-- !query 19 schema struct --- !query 15 output +-- !query 19 output 2 --- !query 16 +-- !query 20 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( SELECT max(c) AS c FROM ( @@ -211,13 +265,13 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 16 schema +-- !query 20 schema struct --- !query 16 output +-- !query 20 output 2 --- !query 17 +-- !query 21 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( WITH t(c) AS (SELECT 2) @@ -226,25 +280,25 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 17 schema +-- !query 21 schema struct --- !query 17 output +-- !query 21 output 3 --- !query 18 +-- !query 22 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) SELECT * FROM t ) --- !query 18 schema +-- !query 22 schema struct --- !query 18 output +-- !query 22 output 1 --- !query 19 +-- !query 23 WITH t AS (SELECT 1) SELECT ( SELECT ( @@ -252,13 +306,13 @@ SELECT ( SELECT * FROM t ) ) --- !query 19 schema +-- !query 23 schema struct --- !query 19 output +-- !query 23 output 1 --- !query 20 +-- !query 24 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) @@ -267,23 +321,23 @@ SELECT ( SELECT * FROM t ) ) --- !query 20 schema +-- !query 24 schema struct --- !query 20 output +-- !query 24 output 1 --- !query 21 +-- !query 25 DROP VIEW IF EXISTS t --- !query 21 schema +-- !query 25 schema struct<> --- !query 21 output +-- !query 25 output --- !query 22 +-- !query 26 DROP VIEW IF EXISTS t2 --- !query 22 schema +-- !query 26 schema struct<> --- !query 22 output +-- !query 26 output